PSARC 2008/395 iSER: iSCSI Extensions for RDMA
6702590 iSCSI initiator needs to support iSER transport
6702591 COMSTAR iSCSI port provider needs to support iSER transport
6797024 COMSTAR iscsit asserted at iscsit_login.c line: 679
6776635 panic[cpu0]/thread=ffffff000f874c60,assertion failed: 0, file: ../../common/io/idm/idm.c, line: 1465
6802232 Bad kernel fault at addr=0x0 from idm_crc32c call
--- a/usr/src/cmd/mdb/common/modules/idm/idm.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/cmd/mdb/common/modules/idm/idm.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -43,9 +43,14 @@
#define ISCSIT_TGT_SM_STRINGS
#define ISCSIT_SESS_SM_STRINGS
#define ISCSIT_LOGIN_SM_STRINGS
+#define ISCSI_SESS_SM_STRINGS
+#define ISCSI_CMD_SM_STRINGS
+#define ISCSI_ICS_NAMES
+#define ISCSI_LOGIN_STATE_NAMES
#include <sys/idm/idm.h>
#include <iscsit.h>
#include <iscsit_isns.h>
+#include <iscsi.h>
/*
* We want to be able to print multiple levels of object hierarchy with a
@@ -106,6 +111,7 @@
} iscsi_dcmd_ctrl_t;
static int iscsi_walk_all_sess(iscsi_dcmd_ctrl_t *idc);
+static int iscsi_walk_ini_sessions(uintptr_t array_addr);
static int iscsi_walk_all_conn(iscsi_dcmd_ctrl_t *idc);
static int iscsi_tgt_walk_cb(uintptr_t addr, const void *list_walker_data,
void *idc_void);
@@ -140,19 +146,25 @@
static int iscsi_isns(uintptr_t addr, uint_t flags, int argc,
const mdb_arg_t *argv);
-static const char *iscsi_idm_conn_event(int event);
-static const char *iscsi_iscsit_tgt_event(int event);
-static const char *iscsi_iscsit_sess_event(int event);
-static const char *iscsi_iscsit_login_event(int event);
-static const char *iscsi_idm_conn_state(int state);
-static const char *iscsi_idm_task_state(int state);
-static const char *iscsi_iscsit_tgt_state(int state);
-static const char *iscsi_iscsit_sess_state(int state);
-static const char *iscsi_iscsit_login_state(int state);
+static const char *iscsi_idm_conn_event(unsigned int event);
+static const char *iscsi_iscsit_tgt_event(unsigned int event);
+static const char *iscsi_iscsit_sess_event(unsigned int event);
+static const char *iscsi_iscsit_login_event(unsigned int event);
+static const char *iscsi_iscsi_cmd_event(unsigned int event);
+static const char *iscsi_iscsi_sess_event(unsigned int event);
+static const char *iscsi_idm_conn_state(unsigned int state);
+static const char *iscsi_idm_task_state(unsigned int state);
+static const char *iscsi_iscsit_tgt_state(unsigned int state);
+static const char *iscsi_iscsit_sess_state(unsigned int state);
+static const char *iscsi_iscsit_login_state(unsigned int state);
+static const char *iscsi_iscsi_cmd_state(unsigned int state);
+static const char *iscsi_iscsi_sess_state(unsigned int state);
+static const char *iscsi_iscsi_conn_state(unsigned int state);
+static const char *iscsi_iscsi_login_state(unsigned int state);
static void iscsi_format_timestamp(char *ts_str, int strlen,
timespec_t *ts);
-static char *inet_ntop(int af, const void *addr, char *buf, int addrlen);
+static char *iscsi_inet_ntop(int af, const void *addr, char *buf, int addrlen);
static void convert2ascii(char *, const in6_addr_t *);
static int sa_to_str(struct sockaddr_storage *sa, char *addr);
static int iscsi_isns_portal_cb(uintptr_t addr, const void *walker_data,
@@ -487,6 +499,59 @@
/*NOTREACHED*/
}
+/*
+ * Helper function to list all the initiator sessions
+ */
+static int
+iscsi_walk_ini_sessions(uintptr_t array_vaddr)
+{
+ iscsi_hba_t ihp;
+ int i;
+ int array_size;
+ struct i_ddi_soft_state *ss;
+ iscsi_sess_t *isp;
+
+ ss = (struct i_ddi_soft_state *)mdb_alloc(sizeof (*ss),
+ UM_SLEEP|UM_GC);
+ if (mdb_vread(ss, sizeof (*ss), array_vaddr) != sizeof (*ss)) {
+ mdb_warn("Cannot read softstate struct (Invalid pointer?).\n");
+ return (DCMD_ERR);
+ }
+ array_size = ss->n_items * (sizeof (void *));
+ array_vaddr = (uintptr_t)ss->array;
+ ss->array = mdb_alloc(array_size, UM_SLEEP|UM_GC);
+ if (mdb_vread(ss->array, array_size, array_vaddr) != array_size) {
+ mdb_warn("Corrupted softstate struct.\n");
+ return (DCMD_ERR);
+ }
+ for (i = 0; i < ss->n_items; i++) {
+ if (ss->array[i] == 0)
+ continue;
+
+ if (mdb_vread(&ihp, sizeof (ihp), (uintptr_t)ss->array[i])
+ != sizeof (ihp)) {
+ mdb_warn("Corrupted softstate struct.\n");
+ return (DCMD_ERR);
+ }
+ mdb_printf("iscsi_hba %p sessions: \n", ihp);
+ mdb_printf("%<u>%-19s %-4s %-8s%</u>\n",
+ "Session", "Type", "State");
+ for (isp = ihp.hba_sess_list; isp; ) {
+ iscsi_sess_t sess;
+ if ((mdb_vread(&sess, sizeof (iscsi_sess_t),
+ (uintptr_t)isp)) != sizeof (iscsi_sess_t)) {
+ mdb_warn("Failed to read session\n");
+ return (DCMD_ERR);
+ }
+ mdb_printf("%-19p %-4d %-8d\n", isp,
+ sess.sess_type,
+ sess.sess_state);
+ isp = sess.sess_next;
+ }
+ }
+ return (DCMD_OK);
+}
+
static int
iscsi_walk_all_sess(iscsi_dcmd_ctrl_t *idc)
{
@@ -494,7 +559,18 @@
uintptr_t avl_addr;
uintptr_t list_addr;
GElf_Sym sym;
+ uintptr_t adr;
+ /* Initiator sessions */
+ if (idc->idc_ini) {
+ if (mdb_readvar(&adr, "iscsi_state") == -1) {
+ mdb_warn("state variable iscsi_state not found.\n");
+ mdb_warn("Is the driver loaded ?\n");
+ return (DCMD_ERR);
+ }
+ return (iscsi_walk_ini_sessions(adr));
+ }
+ /* Target sessions */
/* Walk discovery sessions */
if (mdb_lookup_by_name("iscsit_global", &sym) == -1) {
mdb_warn("failed to find symbol 'iscsit_global'");
@@ -1719,6 +1795,18 @@
event_name =
iscsi_iscsit_login_event(sar->sar_event);
break;
+ case SAS_ISCSI_CMD:
+ state_name =
+ iscsi_iscsi_cmd_state(sar->sar_state);
+ event_name=
+ iscsi_iscsi_cmd_event(sar->sar_event);
+ break;
+ case SAS_ISCSI_SESS:
+ state_name =
+ iscsi_iscsi_sess_state(sar->sar_state);
+ event_name=
+ iscsi_iscsi_sess_event(sar->sar_event);
+ break;
default:
state_name = event_name = "N/A";
break;
@@ -1762,6 +1850,30 @@
iscsi_iscsit_login_state(
sar->sar_new_state);
break;
+ case SAS_ISCSI_CMD:
+ state_name =
+ iscsi_iscsi_cmd_state(sar->sar_state);
+ new_state_name=
+ iscsi_iscsi_cmd_state(sar->sar_new_state);
+ break;
+ case SAS_ISCSI_SESS:
+ state_name =
+ iscsi_iscsi_sess_state(sar->sar_state);
+ new_state_name=
+ iscsi_iscsi_sess_state(sar->sar_new_state);
+ break;
+ case SAS_ISCSI_CONN:
+ state_name =
+ iscsi_iscsi_conn_state(sar->sar_state);
+ new_state_name=
+ iscsi_iscsi_conn_state(sar->sar_new_state);
+ break;
+ case SAS_ISCSI_LOGIN:
+ state_name =
+ iscsi_iscsi_login_state(sar->sar_state);
+ new_state_name=
+ iscsi_iscsi_login_state(sar->sar_new_state);
+ break;
default:
break;
}
@@ -1782,101 +1894,100 @@
}
static const char *
-iscsi_idm_conn_event(int event)
+iscsi_idm_conn_event(unsigned int event)
{
- const char *name = "N/A";
-
- event = (event > CE_MAX_EVENT) ? CE_MAX_EVENT : event;
- name = idm_ce_name[event];
-
- return (name);
+ return ((event < CE_MAX_EVENT) ? idm_ce_name[event] : "N/A");
}
static const char *
-iscsi_iscsit_tgt_event(int event)
+iscsi_iscsit_tgt_event(unsigned int event)
{
- const char *name = "N/A";
+ return ((event < TE_MAX_EVENT) ? iscsit_te_name[event] : "N/A");
+}
- event = (event > TE_MAX_EVENT) ? TE_MAX_EVENT : event;
- name = iscsit_te_name[event];
-
- return (name);
+static const char *
+iscsi_iscsit_sess_event(unsigned int event)
+{
+ return ((event < SE_MAX_EVENT) ? iscsit_se_name[event] : "N/A");
}
static const char *
-iscsi_iscsit_sess_event(int event)
+iscsi_iscsit_login_event(unsigned int event)
{
- const char *name = "N/A";
-
- event = (event > SE_MAX_EVENT) ? SE_MAX_EVENT : event;
- name = iscsit_se_name[event];
-
- return (name);
+ return ((event < ILE_MAX_EVENT) ? iscsit_ile_name[event] : "N/A");
}
static const char *
-iscsi_iscsit_login_event(int event)
+iscsi_iscsi_cmd_event(unsigned int event)
{
- const char *name = "N/A";
-
- event = (event > ILE_MAX_EVENT) ? ILE_MAX_EVENT : event;
- name = iscsit_ile_name[event];
-
- return (name);
+ return ((event < ISCSI_CMD_EVENT_MAX) ?
+ iscsi_cmd_event_names[event] : "N/A");
}
static const char *
-iscsi_idm_conn_state(int state)
+iscsi_iscsi_sess_event(unsigned int event)
{
- const char *name = "N/A";
- state = (state > CS_MAX_STATE) ? CS_MAX_STATE : state;
- name = idm_cs_name[state];
+ return ((event < ISCSI_SESS_EVENT_MAX) ?
+ iscsi_sess_event_names[event] : "N/A");
+}
- return (name);
+static const char *
+iscsi_idm_conn_state(unsigned int state)
+{
+ return ((state < CS_MAX_STATE) ? idm_cs_name[state] : "N/A");
}
/*ARGSUSED*/
static const char *
-iscsi_idm_task_state(int state)
+iscsi_idm_task_state(unsigned int state)
{
- const char *name = "N/A";
- return (name);
+ return ("N/A");
}
static const char *
-iscsi_iscsit_tgt_state(int state)
+iscsi_iscsit_tgt_state(unsigned int state)
{
- const char *name = "N/A";
+ return ((state < TS_MAX_STATE) ? iscsit_ts_name[state] : "N/A");
+}
- state = (state > TS_MAX_STATE) ? TS_MAX_STATE : state;
- name = iscsit_ts_name[state];
+static const char *
+iscsi_iscsit_sess_state(unsigned int state)
+{
+ return ((state < SS_MAX_STATE) ? iscsit_ss_name[state] : "N/A");
+}
- return (name);
+static const char *
+iscsi_iscsit_login_state(unsigned int state)
+{
+ return ((state < ILS_MAX_STATE) ? iscsit_ils_name[state] : "N/A");
}
static const char *
-iscsi_iscsit_sess_state(int state)
+iscsi_iscsi_cmd_state(unsigned int state)
{
- const char *name = "N/A";
-
- state = (state > SS_MAX_STATE) ? SS_MAX_STATE : state;
- name = iscsit_ss_name[state];
-
- return (name);
+ return ((state < ISCSI_CMD_STATE_MAX) ?
+ iscsi_cmd_state_names[state] : "N/A");
}
static const char *
-iscsi_iscsit_login_state(int state)
+iscsi_iscsi_sess_state(unsigned int state)
{
- const char *name = "N/A";
-
- state = (state > ILS_MAX_STATE) ? ILS_MAX_STATE : state;
- name = iscsit_ils_name[state];
-
- return (name);
+ return ((state < ISCSI_SESS_STATE_MAX) ?
+ iscsi_sess_state_names[state] : "N/A");
}
+static const char *
+iscsi_iscsi_conn_state(unsigned int state)
+{
+ return ((state < ISCSI_CONN_STATE_MAX) ? iscsi_ics_name[state] : "N/A");
+}
+
+static const char *
+iscsi_iscsi_login_state(unsigned int state)
+{
+ return ((state < LOGIN_MAX) ? iscsi_login_state_names[state] : "N/A");
+}
/*
@@ -1915,7 +2026,7 @@
if (sa->ss_family == AF_INET) {
sin = (struct sockaddr_in *)sa;
- bufp = inet_ntop(AF_INET,
+ bufp = iscsi_inet_ntop(AF_INET,
(const void *)&(sin->sin_addr.s_addr),
buf, PORTAL_STR_LEN);
if (bufp == NULL) {
@@ -1925,7 +2036,7 @@
} else if (sa->ss_family == AF_INET6) {
strlcat(buf, "[", sizeof (buf));
sin6 = (struct sockaddr_in6 *)sa;
- bufp = inet_ntop(AF_INET6,
+ bufp = iscsi_inet_ntop(AF_INET6,
(const void *)&sin6->sin6_addr.s6_addr,
&buf[1], PORTAL_STR_LEN - 1);
if (bufp == NULL) {
@@ -2262,14 +2373,14 @@
}
/*
- * inet_ntop -- Convert an IPv4 or IPv6 address in binary form into
+ * iscsi_inet_ntop -- Convert an IPv4 or IPv6 address in binary form into
* printable form, and return a pointer to that string. Caller should
* provide a buffer of correct length to store string into.
* Note: this routine is kernel version of inet_ntop. It has similar
- * format as inet_ntop() defined in rfc2553. But it does not do
+ * format as iscsi_inet_ntop() defined in rfc2553. But it does not do
* error handling operations exactly as rfc2553 defines. This function
* is used by kernel inet directory routines only for debugging.
- * This inet_ntop() function, does not return NULL if third argument
+ * This iscsi_inet_ntop() function, does not return NULL if third argument
* is NULL. The reason is simple that we don't want kernel to panic
* as the output of this function is directly fed to ip<n>dbg macro.
* Instead it uses a local buffer for destination address for
@@ -2287,7 +2398,7 @@
#endif
char *
-inet_ntop(int af, const void *addr, char *buf, int addrlen)
+iscsi_inet_ntop(int af, const void *addr, char *buf, int addrlen)
{
static char local_buf[PORTAL_STR_LEN];
static char *err_buf1 = "<badaddr>";
@@ -2297,7 +2408,7 @@
char *caddr;
/*
- * We don't allow thread unsafe inet_ntop calls, they
+ * We don't allow thread unsafe iscsi_inet_ntop calls, they
* must pass a non-null buffer pointer. For DEBUG mode
* we use the ASSERT() and for non-debug kernel it will
* silently allow it for now. Someday we should remove
--- a/usr/src/cmd/mdb/intel/amd64/idm/Makefile Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/cmd/mdb/intel/amd64/idm/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -29,6 +29,7 @@
MODSRCS = idm.c
ISCSITBASE = ../../../../../uts/common/io/comstar/port/iscsit
+ISCSIBASE = ../../../../../uts/common/io/scsi/adapters/iscsi
include ../../../../Makefile.cmd
include ../../../../Makefile.cmd.64
@@ -36,3 +37,4 @@
include ../../../Makefile.module
CPPFLAGS += -I$(ISCSITBASE)
+CPPFLAGS += -I$(ISCSIBASE)
--- a/usr/src/cmd/mdb/intel/ia32/idm/Makefile Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/cmd/mdb/intel/ia32/idm/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -29,9 +29,11 @@
MODSRCS = idm.c
ISCSITBASE = ../../../../../uts/common/io/comstar/port/iscsit
+ISCSIBASE = ../../../../../uts/common/io/scsi/adapters/iscsi
include ../../../../Makefile.cmd
include ../../Makefile.ia32
include ../../../Makefile.module
CPPFLAGS += -I$(ISCSITBASE)
+CPPFLAGS += -I$(ISCSIBASE)
--- a/usr/src/cmd/mdb/sparc/v9/idm/Makefile Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/cmd/mdb/sparc/v9/idm/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -29,6 +29,7 @@
MODSRCS = idm.c
ISCSITBASE = ../../../../../uts/common/io/comstar/port/iscsit
+ISCSIBASE = ../../../../../uts/common/io/scsi/adapters/iscsi
include ../../../../Makefile.cmd
include ../../../../Makefile.cmd.64
@@ -36,3 +37,4 @@
include ../../../Makefile.module
CPPFLAGS += -I$(ISCSITBASE)
+CPPFLAGS += -I$(ISCSIBASE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWiscsidmr/postinstall Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,51 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+PATH="/usr/bin:/usr/sbin:${PATH}"; export PATH
+
+IB_DEV_DIR="/devices/ib"; export IB_DEV_DIR
+
+# Driver definitions
+DRVR_NAME=iser; export DRVR_NAME
+DRVR_PERM='* 0600 root sys'; export DRVR_PERM
+
+if [ "${BASEDIR}" = "/" ]; then
+
+ # Is there IB hardware ?
+ if [ ! -d $IB_DEV_DIR ]
+ then
+ # On a system with no IB hardware, modify the system files only
+ # If the IB hardware is later hotplugged in, any application
+ # opening the "iser" device node will invoke load and attach of
+ # the iser driver.
+ add_drv -n -m "${DRVR_PERM}" ${DRVR_NAME}
+ else
+ add_drv -m "${DRVR_PERM}" ${DRVR_NAME}
+ fi
+else
+ add_drv -b "${BASEDIR}" -m "${DRVR_PERM}" ${DRVR_NAME}
+fi
+
+exit 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWiscsidmr/preremove Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,34 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+PATH="/usr/bin:/usr/sbin:${PATH}"
+export PATH
+
+DRVR_NAME=iser
+
+# Remove the driver entries but leave it attached.
+/usr/sbin/rem_drv -b ${BASEDIR} ${DRVR_NAME}
+
+exit 0
--- a/usr/src/pkgdefs/SUNWiscsidmr/prototype_com Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/pkgdefs/SUNWiscsidmr/prototype_com Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -38,9 +38,14 @@
i copyright
i pkginfo
i depend
+i postinstall
+i preremove
+
#
# SUNWiscsidmr files
#
d none kernel 0755 root sys
+d none kernel/drv 0755 root sys
+f none kernel/drv/iser.conf 0644 root sys
d none kernel/misc 0755 root sys
d none kernel/kmdb 0755 root sys
--- a/usr/src/pkgdefs/SUNWiscsidmr/prototype_i386 Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/pkgdefs/SUNWiscsidmr/prototype_i386 Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -49,6 +49,9 @@
f none kernel/misc/idm 0755 root sys
d none kernel/misc/amd64 0755 root sys
f none kernel/misc/amd64/idm 0755 root sys
+f none kernel/drv/iser 0755 root sys
+d none kernel/drv/amd64 0755 root sys
+f none kernel/drv/amd64/iser 0755 root sys
f none kernel/kmdb/idm 0555 root sys
d none kernel/kmdb/amd64 0755 root sys
f none kernel/kmdb/amd64/idm 0555 root sys
--- a/usr/src/pkgdefs/SUNWiscsidmr/prototype_sparc Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/pkgdefs/SUNWiscsidmr/prototype_sparc Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
@@ -47,5 +47,7 @@
#
d none kernel/misc/sparcv9 0755 root sys
f none kernel/misc/sparcv9/idm 0755 root sys
+d none kernel/drv/sparcv9 0755 root sys
+f none kernel/drv/sparcv9/iser 0755 root sys
d none kernel/kmdb/sparcv9 0755 root sys
f none kernel/kmdb/sparcv9/idm 0555 root sys
--- a/usr/src/pkgdefs/SUNWiscsir/Makefile Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/pkgdefs/SUNWiscsir/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -19,17 +19,17 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
include ../Makefile.com
-DATAFILES += depend i.iscsiconf i.manifest r.manifest
+DATAFILES += i.iscsiconf i.manifest r.manifest
.KEEP_STATE:
-all: $(FILES) preremove postinstall
+all: $(FILES) depend preremove postinstall
install: all pkg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWiscsir/depend Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,49 @@
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This package information file defines software dependencies associated
+# with the pkg. You can define three types of pkg dependencies with this file:
+# P indicates a prerequisite for installation
+# I indicates an incompatible package
+# R indicates a reverse dependency
+# <pkg.abbr> see pkginfo(4), PKG parameter
+# <name> see pkginfo(4), NAME parameter
+# <version> see pkginfo(4), VERSION parameter
+# <arch> see pkginfo(4), ARCH parameter
+# <type> <pkg.abbr> <name>
+# (<arch>)<version>
+# (<arch>)<version>
+# ...
+# <type> <pkg.abbr> <name>
+# ...
+#
+
+P SUNWcar Core Architecture, (Root)
+P SUNWcakr Core Solaris Kernel Architecture (Root)
+P SUNWkvm Core Architecture, (Kvm)
+P SUNWcsr Core Solaris, (Root)
+P SUNWckr Core Solaris Kernel (Root)
+P SUNWcnetr Core Solaris Network Infrastructure (Root)
+P SUNWcsu Core Solaris, (Usr)
+P SUNWcsd Core Solaris Devices
+P SUNWcsl Core Solaris Libraries
+P SUNWiscsidmr Sun iSCSI Data Mover (Root)
--- a/usr/src/uts/common/Makefile.files Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/Makefile.files Tue Mar 24 17:50:49 2009 -0600
@@ -556,6 +556,9 @@
RDSIB_OBJS += rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \
rdsib_debug.o rdsib_sc.o
+ISER_OBJS += iser.o iser_cm.o iser_cq.o iser_ib.o iser_idm.o \
+ iser_resource.o iser_xfer.o
+
UDP_OBJS += udpddi.o
UDP6_OBJS += udp6ddi.o
--- a/usr/src/uts/common/Makefile.rules Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/Makefile.rules Tue Mar 24 17:50:49 2009 -0600
@@ -637,6 +637,10 @@
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/ib/clients/iser/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/ib/clients/ibd/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1730,6 +1734,9 @@
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ib/clients/rds/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ib/clients/iser/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ib/clients/ibd/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c Tue Mar 24 17:50:49 2009 -0600
@@ -971,6 +971,13 @@
iscsit_conn_t *ict;
/*
+ * We need to get a global hold here to ensure that the service
+ * doesn't get shutdown prior to establishing a session. This
+ * gets released in iscsit_conn_destroy().
+ */
+ iscsit_global_hold();
+
+ /*
* Allocate an associated iscsit structure to represent this
* connection. We shouldn't really create a session until we
* get the first login PDU.
@@ -1129,6 +1136,8 @@
idm_refcnt_destroy(&ict->ict_refcnt);
kmem_free(ict, sizeof (*ict));
+ iscsit_global_rele();
+
return (IDM_STATUS_SUCCESS);
}
@@ -1422,6 +1431,13 @@
hton24(rsp->dlength, resp_datalen);
}
+ DTRACE_PROBE5(iscsi__scsi__response,
+ iscsit_conn_t *, itask->it_ict,
+ uint8_t, rsp->response,
+ uint8_t, rsp->cmd_status,
+ idm_pdu_t *, pdu,
+ scsi_task_t *, task);
+
iscsit_pdu_tx(pdu);
return (STMF_SUCCESS);
@@ -1830,6 +1846,8 @@
idm_pdu_t *rsp_pdu;
idm_conn_t *ic;
iscsi_scsi_rsp_hdr_t *resp;
+ iscsi_scsi_cmd_hdr_t *req =
+ (iscsi_scsi_cmd_hdr_t *)rx_pdu->isp_hdr;
ic = ict->ict_ic;
@@ -1841,7 +1859,20 @@
resp->flags = ISCSI_FLAG_FINAL;
resp->response = response;
resp->cmd_status = cmd_status;
- resp->itt = rx_pdu->isp_hdr->itt;
+ resp->itt = req->itt;
+ if ((response == ISCSI_STATUS_CMD_COMPLETED) &&
+ (req->data_length != 0) &&
+ ((req->flags & ISCSI_FLAG_CMD_READ) ||
+ (req->flags & ISCSI_FLAG_CMD_WRITE))) {
+ resp->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+ resp->residual_count = req->data_length;
+ }
+
+ DTRACE_PROBE4(iscsi__scsi__direct__response,
+ iscsit_conn_t *, ict,
+ uint8_t, resp->response,
+ uint8_t, resp->cmd_status,
+ idm_pdu_t *, rsp_pdu);
iscsit_pdu_tx(rsp_pdu);
}
@@ -1853,6 +1884,11 @@
tm_resp = (iscsi_scsi_task_mgt_rsp_hdr_t *)tm_resp_pdu->isp_hdr;
tm_resp->response = tm_status;
+
+ DTRACE_PROBE3(iscsi__scsi__tm__response,
+ iscsit_conn_t *, tm_resp_pdu->isp_ic->ic_handle,
+ uint8_t, tm_resp->response,
+ idm_pdu_t *, tm_resp_pdu);
iscsit_pdu_tx(tm_resp_pdu);
}
@@ -1889,6 +1925,11 @@
/*
* Figure out what we're being asked to do.
*/
+ DTRACE_PROBE4(iscsi__scsi__tm__request,
+ iscsit_conn_t *, ict,
+ uint8_t, (iscsi_tm->function & ISCSI_FLAG_TASK_MGMT_FUNCTION_MASK),
+ uint32_t, iscsi_tm->rtt,
+ idm_pdu_t *, rx_pdu);
switch (iscsi_tm->function & ISCSI_FLAG_TASK_MGMT_FUNCTION_MASK) {
case ISCSI_TM_FUNC_ABORT_TASK:
/*
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ISCSIT_H_
@@ -204,22 +204,22 @@
* iSCSI Auth Information
*/
typedef struct conn_auth {
- char ca_tgt_chapuser[iscsiAuthStringMaxLength];
- uint8_t ca_tgt_chapsecret[iscsiAuthStringMaxLength];
+ char ca_tgt_chapuser[iscsitAuthStringMaxLength];
+ uint8_t ca_tgt_chapsecret[iscsitAuthStringMaxLength];
int ca_tgt_chapsecretlen;
- char ca_ini_chapuser[iscsiAuthStringMaxLength];
- uint8_t ca_ini_chapsecret[iscsiAuthStringMaxLength];
+ char ca_ini_chapuser[iscsitAuthStringMaxLength];
+ uint8_t ca_ini_chapsecret[iscsitAuthStringMaxLength];
int ca_ini_chapsecretlen;
/* RADIUS authentication information */
boolean_t ca_use_radius;
struct sockaddr_storage ca_radius_server;
- uint8_t ca_radius_secret[iscsiAuthStringMaxLength];
+ uint8_t ca_radius_secret[iscsitAuthStringMaxLength];
int ca_radius_secretlen;
/* authentication method list */
- iscsit_auth_method_t ca_method_valid_list[iscsiAuthMethodMaxCount];
+ iscsit_auth_method_t ca_method_valid_list[iscsitAuthMethodMaxCount];
/* Target alias */
char ca_tgt_alias[MAX_ISCSI_NODENAMELEN];
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_auth.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_auth.c Tue Mar 24 17:50:49 2009 -0600
@@ -676,7 +676,7 @@
}
bin = &(client->auth_send_binary_block.largeBinary[0]);
- len = iscsiAuthChapResponseLength;
+ len = iscsitAuthChapResponseLength;
auth_random_set_data(bin, len);
client_set_binary_data(&client->sendKeyBlock,
AKT_CHAP_C,
@@ -707,7 +707,7 @@
uint32_t chap_id;
unsigned char *chap_challenge;
unsigned int challenge_len;
- uchar_t resp[iscsiAuthChapResponseLength];
+ uchar_t resp[iscsitAuthChapResponseLength];
tgt_username = auth->ca_tgt_chapuser;
tgt_password = auth->ca_tgt_chapsecret;
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_authclient.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_authclient.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -174,7 +174,7 @@
uchar_t *chap_c, unsigned int challenge_len,
uchar_t *chap_r, unsigned int resp_len)
{
- uchar_t verifyData[iscsiAuthChapResponseLength];
+ uchar_t verifyData[iscsitAuthChapResponseLength];
conn_auth_t *auth = &lsm->icl_auth;
/* Check if RADIUS access is enabled */
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_authclient.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_authclient.h Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ISCSIT_AUTHCLIENT_H_
@@ -28,17 +28,17 @@
#define ISCSI_AUTH_PASSED 0
#define ISCSI_AUTH_FAILED 1
-enum { iscsiAuthStringMaxLength = 256 };
+enum { iscsitAuthStringMaxLength = 256 };
enum { AuthStringMaxLength = 256 };
enum { AuthStringBlockMaxLength = 1024 };
enum { AuthLargeBinaryMaxLength = 1024 };
-enum { iscsiAuthChapResponseLength = 16 };
+enum { iscsitAuthChapResponseLength = 16 };
-enum { iscsiAuthMethodMaxCount = 2 };
+enum { iscsitAuthMethodMaxCount = 2 };
-enum { iscsiAuthChapAlgorithmMd5 = 5 };
+enum { iscsitAuthChapAlgorithmMd5 = 5 };
enum {
AKT_CHAP_A = 0,
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c Tue Mar 24 17:50:49 2009 -0600
@@ -348,6 +348,15 @@
SAS_ISCSIT_LOGIN, (int)lsm->icl_login_state,
(int)ctx->le_ctx_event, (uintptr_t)pdu);
+ /*
+ * If the lsm is in a terminal state, just drain
+ * any remaining events.
+ */
+ if ((lsm->icl_login_state == ILS_LOGIN_ERROR) ||
+ (lsm->icl_login_state == ILS_LOGIN_DONE)) {
+ kmem_free(ctx, sizeof (*ctx));
+ continue;
+ }
mutex_exit(&lsm->icl_mutex);
login_sm_event_dispatch(lsm, ict, ctx);
mutex_enter(&lsm->icl_mutex);
@@ -414,6 +423,7 @@
login_sm_build_login_response(ict);
login_sm_send_next_response(ict);
idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
+ kmem_free(ctx, sizeof (*ctx));
return;
}
break;
@@ -1538,19 +1548,19 @@
if (strcmp(chapuser, "") == 0) {
(void) strlcpy(lsm->icl_auth.ca_ini_chapuser,
lsm->icl_initiator_name,
- min(iscsiAuthStringMaxLength, MAX_ISCSI_NODENAMELEN));
+ min(iscsitAuthStringMaxLength, MAX_ISCSI_NODENAMELEN));
} else {
(void) strlcpy(lsm->icl_auth.ca_ini_chapuser, chapuser,
- iscsiAuthStringMaxLength);
+ iscsitAuthStringMaxLength);
}
if ((lsm->icl_target_name != NULL) &&
(strcmp(targetchapuser, "") == 0)) {
(void) strlcpy(lsm->icl_auth.ca_tgt_chapuser,
lsm->icl_target_name,
- min(iscsiAuthStringMaxLength, MAX_ISCSI_NODENAMELEN));
+ min(iscsitAuthStringMaxLength, MAX_ISCSI_NODENAMELEN));
} else {
(void) strlcpy(lsm->icl_auth.ca_tgt_chapuser,
- targetchapuser, iscsiAuthStringMaxLength);
+ targetchapuser, iscsitAuthStringMaxLength);
}
/*
@@ -1561,8 +1571,8 @@
lsm->icl_auth.ca_ini_chapsecretlen = 0;
} else {
if (iscsi_base64_str_to_binary(chapsecret,
- strnlen(chapsecret, iscsiAuthStringMaxLength),
- lsm->icl_auth.ca_ini_chapsecret, iscsiAuthStringMaxLength,
+ strnlen(chapsecret, iscsitAuthStringMaxLength),
+ lsm->icl_auth.ca_ini_chapsecret, iscsitAuthStringMaxLength,
&lsm->icl_auth.ca_ini_chapsecretlen) != 0) {
cmn_err(CE_WARN, "Corrupted CHAP secret"
" for initiator %s", lsm->icl_initiator_name);
@@ -1573,8 +1583,8 @@
lsm->icl_auth.ca_tgt_chapsecretlen = 0;
} else {
if (iscsi_base64_str_to_binary(targetchapsecret,
- strnlen(targetchapsecret, iscsiAuthStringMaxLength),
- lsm->icl_auth.ca_tgt_chapsecret, iscsiAuthStringMaxLength,
+ strnlen(targetchapsecret, iscsitAuthStringMaxLength),
+ lsm->icl_auth.ca_tgt_chapsecret, iscsitAuthStringMaxLength,
&lsm->icl_auth.ca_tgt_chapsecretlen) != 0) {
cmn_err(CE_WARN, "Corrupted CHAP secret"
" for target %s", lsm->icl_target_name);
@@ -1585,8 +1595,8 @@
lsm->icl_auth.ca_radius_secretlen = 0;
} else {
if (iscsi_base64_str_to_binary(radiussecret,
- strnlen(radiussecret, iscsiAuthStringMaxLength),
- lsm->icl_auth.ca_radius_secret, iscsiAuthStringMaxLength,
+ strnlen(radiussecret, iscsitAuthStringMaxLength),
+ lsm->icl_auth.ca_radius_secret, iscsitAuthStringMaxLength,
&lsm->icl_auth.ca_radius_secretlen) != 0) {
cmn_err(CE_WARN, "Corrupted RADIUS secret");
lsm->icl_auth.ca_radius_secretlen = 0;
@@ -2428,11 +2438,9 @@
boolean_t boolean_val;
uint64_t uint64_val;
int nvrc;
- idm_status_t idmrc;
/* Let the IDM level activate its parameters first */
- idmrc = idm_notice_key_values(ict->ict_ic, lsm->icl_negotiated_values);
- ASSERT(idmrc == IDM_STATUS_SUCCESS);
+ idm_notice_key_values(ict->ict_ic, lsm->icl_negotiated_values);
/*
* Initiator alias and target alias
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -425,7 +425,7 @@
list_insert_tail(&ist->ist_events, ctx);
/*
- * Use the icl_busy flag to keep the state machine single threaded.
+ * Use the ist_sm_busy to keep the state machine single threaded.
* This also serves as recursion avoidance since this flag will
* always be set if we call login_sm_event from within the
* state machine code.
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_tgt.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_tgt.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -189,10 +189,10 @@
list_insert_tail(&tgt->target_events, ctx);
/*
- * Use the icl_busy flag to keep the state machine single threaded.
- * This also serves as recursion avoidance since this flag will
- * always be set if we call iscsit_tgt_sm_event from within the
- * state machine code.
+ * Use the target_sm_busy flag to keep the state machine single
+ * threaded. This also serves as recursion avoidance since this
+ * flag will always be set if we call iscsit_tgt_sm_event from
+ * within the state machine code.
*/
if (!tgt->target_sm_busy) {
tgt->target_sm_busy = B_TRUE;
@@ -888,7 +888,11 @@
idm_refcnt_init(&result->target_sess_refcnt, result);
/* Finish initializing local port */
- lport->lport_abort_timeout = 0xffffffff; /* seconds */
+ /*
+ * Would like infinite timeout, but this is about as long as can
+ * be specified to stmf on a 32 bit kernel.
+ */
+ lport->lport_abort_timeout = 2000; /* seconds */
lport->lport_id = result->target_devid;
lport->lport_pp = iscsit_global.global_pp;
lport->lport_ds = iscsit_global.global_dbuf_store;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,436 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <sys/ib/clients/iser/iser.h>
+
+/*
+ * iser.c
+ * DDI and core routines for Solaris iSER implementation.
+ */
+
+iser_state_t *iser_state = NULL; /* global state */
+ddi_taskq_t *iser_taskq = NULL; /* global taskq */
+
+/* set B_TRUE for console logging */
+boolean_t iser_logging = B_FALSE;
+
+/* Driver functions */
+static int iser_attach(dev_info_t *, ddi_attach_cmd_t);
+static int iser_detach(dev_info_t *, ddi_detach_cmd_t);
+static int iser_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int iser_open(dev_t *, int, int, cred_t *);
+static int iser_close(dev_t, int, int, cred_t *);
+static int iser_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+/* static int iser_close(dev_t, int, int, cred_t *); */
+
+/* Char/Block operations */
+static struct cb_ops iser_cb_ops = {
+ iser_open, /* open */
+ iser_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ iser_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* stream */
+ D_MP, /* cb_flag */
+ CB_REV, /* rev */
+ nodev, /* int (*cb_aread)() */
+ nodev, /* int (*cb_awrite)() */
+};
+
+/* Device operations */
+static struct dev_ops iser_ops = {
+ DEVO_REV, /* devo_rev, */
+ 0, /* refcnt */
+ iser_getinfo, /* getinfo */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ iser_attach, /* attach */
+ iser_detach, /* detach */
+ nodev, /* reset */
+ &iser_cb_ops, /* cb_ops */
+ NULL, /* bus ops */
+ NULL, /* power */
+ ddi_quiesce_not_needed /* quiesce */
+};
+
+/* Module Driver Info */
+#define ISER_NAME_VERSION "iSCSI Extensions for RDMA"
+static struct modldrv iser_modldrv = {
+ &mod_driverops,
+ ISER_NAME_VERSION,
+ &iser_ops,
+};
+
+/* Module Linkage */
+static struct modlinkage iser_modlinkage = {
+ MODREV_1,
+ &iser_modldrv,
+ NULL
+};
+
+/*
+ * _init()
+ */
+int
+_init(void)
+{
+ int status;
+
+ iser_state = kmem_zalloc(sizeof (iser_state_t), KM_SLEEP);
+ status = mod_install(&iser_modlinkage);
+ if (status != DDI_SUCCESS) {
+ kmem_free(iser_state, sizeof (iser_state_t));
+ }
+
+ return (status);
+}
+
+/*
+ * _info()
+ */
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&iser_modlinkage, modinfop));
+}
+
+/*
+ * _fini()
+ */
+int
+_fini(void)
+{
+ int status;
+
+ status = mod_remove(&iser_modlinkage);
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ kmem_free(iser_state, sizeof (iser_state_t));
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_attach()
+ */
+static int
+iser_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance;
+ int status;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ ISER_LOG(CE_CONT, "iser_attach: DDI_ATTACH");
+ instance = ddi_get_instance(dip);
+
+ iser_state->is_dip = dip;
+ iser_state->is_instance = instance;
+
+ /* Initialize the open refcnt and it's lock */
+ iser_state->is_open_refcnt = 0;
+ mutex_init(&iser_state->is_refcnt_lock, NULL, MUTEX_DRIVER,
+ NULL);
+
+ iser_taskq = ddi_taskq_create(dip, "iser_taskq",
+ ISER_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0);
+
+ if (iser_taskq == NULL) {
+ ISER_LOG(CE_CONT, "%s%d: failed to create taskq",
+ "iser", instance);
+ mutex_destroy(&iser_state->is_refcnt_lock);
+ return (DDI_FAILURE);
+ }
+
+ /* initialize iSER as IB service */
+ status = iser_ib_init();
+ if (status != DDI_SUCCESS) {
+ ddi_taskq_destroy(iser_taskq);
+ mutex_destroy(&iser_state->is_refcnt_lock);
+ ISER_LOG(CE_CONT, "%s%d: failed to initialize IB",
+ "iser", instance);
+ return (DDI_FAILURE);
+ }
+
+ status = ddi_create_minor_node(
+ dip, ddi_get_name(dip), S_IFCHR, instance,
+ DDI_PSEUDO, 0);
+ if (status != DDI_SUCCESS) {
+ (void) iser_ib_fini();
+ ddi_taskq_destroy(iser_taskq);
+ mutex_destroy(&iser_state->is_refcnt_lock);
+ ISER_LOG(CE_CONT, "%s%d: failed ddi_create_minor_node",
+ "iser", instance);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(dip);
+
+ return (DDI_SUCCESS);
+
+ case DDI_RESUME:
+ ISER_LOG(CE_CONT, "iser_detach: DDI_RESUME unsupported");
+ return (DDI_FAILURE);
+
+ default:
+ ISER_LOG(CE_CONT, "%s%d: unknown cmd in attach (0x%x)", "iser",
+ instance, cmd);
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * iser_detach()
+ */
+static int
+iser_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ mutex_enter(&iser_state->is_refcnt_lock);
+ if (iser_state->is_open_refcnt > 0) {
+ mutex_exit(&iser_state->is_refcnt_lock);
+ return (DDI_FAILURE);
+ }
+ mutex_exit(&iser_state->is_refcnt_lock);
+ mutex_destroy(&iser_state->is_refcnt_lock);
+
+ switch (cmd) {
+ case DDI_DETACH:
+ ISER_LOG(CE_CONT, "iser_detach: DDI_DETACH");
+
+ if (iser_ib_fini() != DDI_SUCCESS) {
+ ISER_LOG(CE_CONT, "iser_ib_fini failed");
+ return (DDI_FAILURE);
+ }
+
+ if (iser_taskq != NULL) {
+ ddi_taskq_destroy(iser_taskq);
+ iser_taskq = NULL;
+ }
+ ddi_remove_minor_node(dip, NULL);
+
+ return (DDI_SUCCESS);
+
+ case DDI_SUSPEND:
+ ISER_LOG(CE_CONT, "iser_detach: DDI_SUSPEND unsupported");
+ return (DDI_FAILURE);
+
+ default:
+ ISER_LOG(CE_CONT, "iser: unknown cmd in detach (0x%x)", cmd);
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * iser_getinfo()
+ */
+/* ARGSUSED */
+static int
+iser_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)iser_state->is_dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = NULL;
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+}
+
+/*
+ * iser_open()
+ */
+/* ARGSUSED */
+static int
+iser_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ minor_t instance;
+ int status;
+
+ instance = getminor(*devp);
+
+ /* Register the transport with IDM */
+ status = iser_idm_register();
+ if (status != DDI_SUCCESS) {
+ ISER_LOG(CE_CONT, "%s%d: failed to register with IDM",
+ "iser", instance);
+ return (ENXIO);
+ }
+
+ /* Increment our open refcnt */
+ mutex_enter(&iser_state->is_refcnt_lock);
+ iser_state->is_open_refcnt++;
+ mutex_exit(&iser_state->is_refcnt_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_close()
+ */
+/* ARGSUSED */
+static int
+iser_close(dev_t devp, int flag, int otyp, cred_t *credp)
+{
+ ASSERT(iser_state->is_open_refcnt != 0);
+
+ mutex_enter(&iser_state->is_refcnt_lock);
+ iser_state->is_open_refcnt--;
+ mutex_exit(&iser_state->is_refcnt_lock);
+
+ return (DDI_SUCCESS);
+}
+
+iser_status_t
+iser_register_service(idm_svc_t *idm_svc)
+{
+
+ return (iser_ib_register_service(idm_svc));
+}
+
+iser_status_t
+iser_bind_service(idm_svc_t *idm_svc)
+{
+
+ return (iser_ib_bind_service(idm_svc));
+}
+
+void
+iser_unbind_service(idm_svc_t *idm_svc)
+{
+
+ iser_ib_unbind_service(idm_svc);
+}
+
+void
+iser_deregister_service(idm_svc_t *idm_svc)
+{
+
+ iser_ib_deregister_service(idm_svc);
+}
+
+/*
+ * iser_path_exists
+ * This function takes in a pair of endpoints and determines if an iSER path
+ * exists between the two. The actual path information (required for creating
+ * a RC channel) is not returned, instead a boolean value indicating if a path
+ * exists is returned.
+ *
+ * To use an implicit source, a value of NULL is allowed for laddr.
+ */
+boolean_t
+iser_path_exists(idm_sockaddr_t *laddr, idm_sockaddr_t *raddr)
+{
+
+ ibt_ip_addr_t remote_ip, local_ip;
+ ibt_path_info_t path;
+ int status;
+
+ iser_ib_conv_sockaddr2ibtaddr(raddr, &remote_ip);
+ iser_ib_conv_sockaddr2ibtaddr(laddr, &local_ip);
+
+ status = iser_ib_get_paths(&local_ip, &remote_ip, &path, NULL);
+
+ return ((status == IBT_SUCCESS) ? B_TRUE : B_FALSE);
+}
+
+/*
+ * iser_channel_alloc
+ * This function allocates a reliable communication channel between the
+ * given endpoints.
+ */
+iser_chan_t *
+iser_channel_alloc(idm_sockaddr_t *laddr, idm_sockaddr_t *raddr)
+{
+ ibt_ip_addr_t remote_ip, local_ip;
+
+ iser_ib_conv_sockaddr2ibtaddr(raddr, &remote_ip);
+ iser_ib_conv_sockaddr2ibtaddr(laddr, &local_ip);
+
+ return (iser_ib_alloc_rc_channel(&local_ip, &remote_ip));
+}
+
+/*
+ * iser_channel_open
+ * This function opens the already allocated communication channel between the
+ * two endpoints.
+ */
+iser_status_t
+iser_channel_open(iser_chan_t *chan)
+{
+ return (iser_ib_open_rc_channel(chan));
+}
+
+/*
+ * iser_channel_close
+ * This function closes the already opened communication channel between the
+ * two endpoints.
+ */
+void
+iser_channel_close(iser_chan_t *chan)
+{
+ iser_ib_close_rc_channel(chan);
+}
+
+/*
+ * iser_channel_free
+ * This function frees the channel between the given endpoints
+ */
+void
+iser_channel_free(iser_chan_t *chan)
+{
+ iser_ib_free_rc_channel(chan);
+}
+
+/* ARGSUSED */
+static int
+iser_ioctl(dev_t devp, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ return (DDI_SUCCESS);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser.conf Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "@(#)iser.conf 1.1 08/06/23 SMI"
+
+name="iser" parent="ib" unit-address="0";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser_cm.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sunddi.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+
+#include <sys/ib/clients/iser/iser.h>
+
+extern idm_transport_ops_t iser_transport_ops;
+
+/*
+ * iser_cm.c
+ * InfiniBand Communication Manager routines for iSER
+ */
+static ibt_cm_status_t iser_ib_handle_cm_req(idm_svc_t *svc_hdl,
+ ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, void *rcmp,
+ ibt_priv_data_len_t rcmp_len);
+
+static ibt_cm_status_t iser_ib_handle_cm_rep(iser_state_t *statep,
+ ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, void *rcmp,
+ ibt_priv_data_len_t rcmp_len);
+
+static ibt_cm_status_t iser_handle_cm_conn_est(ibt_cm_event_t *evp);
+static ibt_cm_status_t iser_handle_cm_conn_closed(ibt_cm_event_t *evp);
+static ibt_cm_status_t iser_handle_cm_event_failure(ibt_cm_event_t *evp);
+
+/*
+ * iser_ib_cm_handler()
+ */
+ibt_cm_status_t
+iser_ib_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
+ ibt_cm_return_args_t *ret_args, void *ret_priv_data,
+ ibt_priv_data_len_t ret_len_max)
+{
+ ibt_cm_status_t ret = IBT_CM_REJECT;
+
+ switch (eventp->cm_type) {
+
+ case IBT_CM_EVENT_REQ_RCV:
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: IBT_CM_EVENT_REQ_RCV");
+ ret = iser_ib_handle_cm_req((idm_svc_t *)cm_private, eventp,
+ ret_args, ret_priv_data, ret_len_max);
+ break;
+
+ case IBT_CM_EVENT_REP_RCV:
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: IBT_CM_EVENT_REP_RCV");
+ ret = iser_ib_handle_cm_rep((iser_state_t *)cm_private,
+ eventp, ret_args, ret_priv_data, ret_len_max);
+ break;
+
+ case IBT_CM_EVENT_CONN_EST:
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: IBT_CM_EVENT_CONN_EST");
+ ret = iser_handle_cm_conn_est(eventp);
+ break;
+
+ case IBT_CM_EVENT_CONN_CLOSED:
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: "
+ "IBT_CM_EVENT_CONN_CLOSED");
+ ret = iser_handle_cm_conn_closed(eventp);
+ break;
+
+ case IBT_CM_EVENT_FAILURE:
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: Event failure");
+ ret = iser_handle_cm_event_failure(eventp);
+ break;
+
+ case IBT_CM_EVENT_MRA_RCV:
+ /* Not supported */
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: MRA message received");
+ break;
+
+ case IBT_CM_EVENT_LAP_RCV:
+ /* Not supported */
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: LAP message received");
+ break;
+
+ case IBT_CM_EVENT_APR_RCV:
+ /* Not supported */
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: APR message received");
+ break;
+
+ default:
+ ISER_LOG(CE_NOTE, "iser_ib_cm_handler: unknown event (0x%x)",
+ eventp->cm_type);
+ break;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static ibt_cm_status_t
+iser_ib_handle_cm_req(idm_svc_t *svc_hdl, ibt_cm_event_t *evp,
+ ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
+{
+
+ iser_private_data_t iser_priv_data;
+ ibt_ip_cm_info_t ipcm_info;
+ iser_chan_t *chan;
+ iser_conn_t *iser_conn;
+ int status;
+
+ /*
+ * CM private data brings IP information
+ * Private data received is a stream of bytes and may not be properly
+ * aligned. So, bcopy the data onto the stack before accessing it.
+ */
+ bcopy((uint8_t *)evp->cm_priv_data, &iser_priv_data,
+ sizeof (iser_private_data_t));
+
+ /* extract the CM IP info */
+ status = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data,
+ &ipcm_info);
+ if (status != IBT_SUCCESS) {
+ return (IBT_CM_REJECT);
+ }
+
+ ISER_LOG(CE_NOTE, "iser_ib_handle_cm_req: ipcm_info (0x%p): src IP "
+ "(0x%08x) src port (0x%04x) dst IP: (0x%08x)", (void *)&ipcm_info,
+ ipcm_info.src_addr.un.ip4addr, ipcm_info.src_port,
+ ipcm_info.dst_addr.un.ip4addr);
+
+ /* Allocate a channel to establish the new connection */
+ chan = iser_ib_alloc_rc_channel(&ipcm_info.dst_addr,
+ &ipcm_info.src_addr);
+ if (chan == NULL) {
+ return (IBT_CM_REJECT);
+ }
+
+ /* Set the local and remote port numbers on the channel handle */
+ chan->ic_lport = svc_hdl->is_svc_req.sr_port;
+ chan->ic_rport = ipcm_info.src_port;
+
+ /* Allocate the iser_conn_t for the IDM svc binding */
+ iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP);
+
+ /* Set up the iser_conn attributes */
+ mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL);
+ iser_conn->ic_type = ISER_CONN_TYPE_TGT;
+ iser_conn->ic_chan = chan;
+ iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED;
+
+ /* Hold a reference to the iSER service handle */
+ iser_tgt_svc_hold((iser_svc_t *)svc_hdl->is_iser_svc);
+
+ iser_conn->ic_idms = svc_hdl;
+
+ /*
+ * Now set a pointer to the iser_conn in the iser_chan for
+ * access during CM event handling
+ */
+ chan->ic_conn = iser_conn;
+
+ rargsp->cm_ret.rep.cm_channel = chan->ic_chanhdl;
+
+ return (IBT_CM_ACCEPT);
+}
+
+/* ARGSUSED */
+static ibt_cm_status_t
+iser_ib_handle_cm_rep(iser_state_t *statep, ibt_cm_event_t *evp,
+ ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
+{
+ /* pre-post work requests into the receive queue */
+ iser_ib_post_recv(evp->cm_channel);
+
+ /* It looks like the RTU need not be send specifically */
+ return (IBT_CM_ACCEPT);
+}
+
+static ibt_cm_status_t
+iser_handle_cm_conn_est(ibt_cm_event_t *evp)
+{
+ iser_chan_t *iser_chan;
+ iser_conn_t *iser_conn;
+ iser_svc_t *iser_svc;
+ idm_status_t status;
+ idm_conn_t *ic;
+
+ iser_chan = (iser_chan_t *)ibt_get_chan_private(evp->cm_channel);
+
+ /*
+ * An ibt_open_rc_channel() comes in as a IBT_CM_EVENT_REQ_RCV on the
+ * iSER-IB target, upon which the target sends a Response, accepting
+ * the request. This comes in as a IBT_CM_EVENT_REP_RCV on the iSER-IB
+ * initiator, which then sends an RTU. Upon getting this RTU from the
+ * iSER-IB initiator, the IBT_CM_EVENT_CONN_EST event is generated on
+ * the target. Then subsequently an IBT_CM_EVENT_CONN_EST event is
+ * generated on the initiator.
+ *
+ * Our new connection has been established on the target. If we are
+ * receiving this event on the target side, the iser_channel can be
+ * used as it is already populated. On the target side, an IDM
+ * connection is then allocated and the IDM layer is notified.
+ * If we are on the initiator we needn't do anything, since we
+ * already have the IDM linkage in place for this connection.
+ */
+ if (iser_chan->ic_conn->ic_type == ISER_CONN_TYPE_TGT) {
+
+ iser_conn = iser_chan->ic_conn;
+ iser_svc = (iser_svc_t *)iser_conn->ic_idms->is_iser_svc;
+
+ mutex_enter(&iser_conn->ic_lock);
+
+ status = idm_svc_conn_create(iser_conn->ic_idms,
+ IDM_TRANSPORT_TYPE_ISER, &ic);
+ if (status != IDM_STATUS_SUCCESS) {
+ /*
+ * No IDM rsrcs or something equally Bad.
+ * Return non-SUCCESS to IBCM. He'll give
+ * us a CONN_CLOSED, which we'll handle
+ * below.
+ */
+ ISER_LOG(CE_NOTE, "iser_handle_cm_conn_est: "
+ "idm_svc_conn_create_failed");
+ mutex_exit(&iser_conn->ic_lock);
+ return (IBT_CM_NO_RESOURCE);
+ }
+
+ /* We no longer need the hold on the iSER service handle */
+ iser_tgt_svc_rele(iser_svc);
+
+ /* Hold a reference on the IDM connection handle */
+ idm_conn_hold(ic);
+
+ /* Set the transport ops and conn on the idm_conn handle */
+ ic->ic_transport_ops = &iser_transport_ops;
+ ic->ic_transport_private = (void *)iser_conn;
+ ic->ic_transport_hdrlen = ISER_HEADER_LENGTH;
+ iser_conn->ic_idmc = ic;
+
+ /*
+ * Set the local and remote addresses in the idm conn handle.
+ */
+ iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr,
+ &iser_conn->ic_chan->ic_localip, iser_chan->ic_lport);
+ iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr,
+ &iser_conn->ic_chan->ic_remoteip, iser_chan->ic_rport);
+
+ /*
+ * Kick the state machine. At CS_S3_XPT_UP the state machine
+ * will notify the client (target) about the new connection.
+ */
+ idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
+ iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED;
+ mutex_exit(&iser_conn->ic_lock);
+
+ /*
+ * Post work requests on the receive queue
+ */
+ iser_ib_post_recv(iser_chan->ic_chanhdl);
+
+ }
+
+ return (IBT_CM_ACCEPT);
+}
+
+static ibt_cm_status_t
+iser_handle_cm_conn_closed(ibt_cm_event_t *evp)
+{
+
+ iser_chan_t *chan;
+
+ chan = (iser_chan_t *)ibt_get_chan_private(evp->cm_channel);
+
+ ISER_LOG(CE_NOTE, "iser_handle_cm_conn_closed: chan (0x%p) "
+ "reason (0x%x)", (void *)chan, evp->cm_event.closed);
+
+ switch (evp->cm_event.closed) {
+ case IBT_CM_CLOSED_DREP_RCVD: /* we requested a disconnect */
+ case IBT_CM_CLOSED_ALREADY: /* duplicate close */
+ /* ignore these */
+ return (IBT_CM_ACCEPT);
+
+ case IBT_CM_CLOSED_DREQ_RCVD: /* request to close the channel */
+ case IBT_CM_CLOSED_REJ_RCVD: /* reject after conn establishment */
+ case IBT_CM_CLOSED_DREQ_TIMEOUT: /* our close request timed out */
+ case IBT_CM_CLOSED_DUP: /* duplicate close request */
+ case IBT_CM_CLOSED_ABORT: /* aborted connection establishment */
+ case IBT_CM_CLOSED_STALE: /* stale / unref connection */
+ /* handle these depending upon our connection state */
+ mutex_enter(&chan->ic_conn->ic_lock);
+ switch (chan->ic_conn->ic_stage) {
+ case ISER_CONN_STAGE_UNDEFINED:
+ case ISER_CONN_STAGE_CLOSED:
+ /* do nothing, just drop the lock */
+ mutex_exit(&chan->ic_conn->ic_lock);
+ break;
+
+ case ISER_CONN_STAGE_ALLOCATED:
+ /*
+ * We blew up or were offlined during connection
+ * establishment. Teardown the iSER conn and chan
+ * handles.
+ */
+ mutex_exit(&chan->ic_conn->ic_lock);
+ iser_internal_conn_destroy(chan->ic_conn);
+ break;
+
+ case ISER_CONN_STAGE_IC_DISCONNECTED:
+ case ISER_CONN_STAGE_IC_FREED:
+ case ISER_CONN_STAGE_CLOSING:
+ /* we're down, set CLOSED */
+ chan->ic_conn->ic_stage = ISER_CONN_STAGE_CLOSED;
+ mutex_exit(&chan->ic_conn->ic_lock);
+ break;
+
+ case ISER_CONN_STAGE_IC_CONNECTED:
+ case ISER_CONN_STAGE_HELLO_SENT:
+ case ISER_CONN_STAGE_HELLO_SENT_FAIL:
+ case ISER_CONN_STAGE_HELLO_WAIT:
+ case ISER_CONN_STAGE_HELLO_RCV:
+ case ISER_CONN_STAGE_HELLO_RCV_FAIL:
+ case ISER_CONN_STAGE_HELLOREPLY_SENT:
+ case ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL:
+ case ISER_CONN_STAGE_HELLOREPLY_RCV:
+ case ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL:
+ case ISER_CONN_STAGE_LOGGED_IN:
+ /* for all other stages, fail the transport */
+ idm_conn_event(chan->ic_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
+ chan->ic_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
+ mutex_exit(&chan->ic_conn->ic_lock);
+ break;
+
+ default:
+ mutex_exit(&chan->ic_conn->ic_lock);
+ ASSERT(0);
+
+ }
+
+ /* accept the event */
+ return (IBT_CM_ACCEPT);
+
+ default:
+ /* unknown event */
+ ISER_LOG(CE_NOTE, "iser_handle_cm_conn_closed: unknown closed "
+ "event: (0x%x)", evp->cm_event.closed);
+ return (IBT_CM_REJECT);
+ }
+}
+
+/*
+ * Handle EVENT FAILURE
+ */
+static ibt_cm_status_t
+iser_handle_cm_event_failure(ibt_cm_event_t *evp)
+{
+ iser_chan_t *chan;
+
+ chan = (iser_chan_t *)ibt_get_chan_private(evp->cm_channel);
+
+ ISER_LOG(CE_NOTE, "iser_handle_cm_event_failure: chan (0x%p): "
+ "code: %d msg: %d reason: %d", (void *)chan,
+ evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
+ evp->cm_event.failed.cf_reason);
+
+ if ((evp->cm_channel == NULL) || (chan == NULL)) {
+ /* channel not established yet */
+ return (IBT_CM_ACCEPT);
+ }
+
+ if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) &&
+ (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) {
+ /*
+ * This end is active, just ignore, ibt_open_rc_channel()
+ * caller will take care of cleanup.
+ */
+ return (IBT_CM_ACCEPT);
+ }
+
+ /* handle depending upon our connection state */
+ mutex_enter(&chan->ic_conn->ic_lock);
+ switch (chan->ic_conn->ic_stage) {
+ case ISER_CONN_STAGE_UNDEFINED:
+ case ISER_CONN_STAGE_CLOSED:
+ /* do nothing, just drop the lock */
+ mutex_exit(&chan->ic_conn->ic_lock);
+ break;
+
+ case ISER_CONN_STAGE_ALLOCATED:
+ /*
+ * We blew up or were offlined during connection
+ * establishment. Teardown the iSER conn and chan
+ * handles.
+ */
+ mutex_exit(&chan->ic_conn->ic_lock);
+ iser_internal_conn_destroy(chan->ic_conn);
+ break;
+
+ case ISER_CONN_STAGE_IC_DISCONNECTED:
+ case ISER_CONN_STAGE_IC_FREED:
+ case ISER_CONN_STAGE_CLOSING:
+ /* update to CLOSED, then drop the lock */
+ chan->ic_conn->ic_stage = ISER_CONN_STAGE_CLOSED;
+ mutex_exit(&chan->ic_conn->ic_lock);
+ break;
+
+ case ISER_CONN_STAGE_IC_CONNECTED:
+ case ISER_CONN_STAGE_HELLO_SENT:
+ case ISER_CONN_STAGE_HELLO_SENT_FAIL:
+ case ISER_CONN_STAGE_HELLO_WAIT:
+ case ISER_CONN_STAGE_HELLO_RCV:
+ case ISER_CONN_STAGE_HELLO_RCV_FAIL:
+ case ISER_CONN_STAGE_HELLOREPLY_SENT:
+ case ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL:
+ case ISER_CONN_STAGE_HELLOREPLY_RCV:
+ case ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL:
+ case ISER_CONN_STAGE_LOGGED_IN:
+ /* fail the transport and move the conn to CLOSING */
+ idm_conn_event(chan->ic_conn->ic_idmc, CE_TRANSPORT_FAIL,
+ IDM_STATUS_FAIL);
+ chan->ic_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
+ mutex_exit(&chan->ic_conn->ic_lock);
+ break;
+
+ default:
+ mutex_exit(&chan->ic_conn->ic_lock);
+ ASSERT(0);
+ }
+
+ /* accept the event */
+ return (IBT_CM_ACCEPT);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser_cq.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,597 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sunddi.h>
+#include <sys/sdt.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+
+#include <sys/ib/clients/iser/iser.h>
+
+/*
+ * iser_cq.c
+ * Routines for completion queue handlers for iSER.
+ */
+static void iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg);
+int iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg);
+static int iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl,
+ iser_chan_t *iser_chan);
+static int iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl,
+ iser_chan_t *iser_chan);
+
+void
+iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
+{
+ iser_chan_t *iser_chan;
+ ibt_status_t status;
+
+ iser_chan = (iser_chan_t *)arg;
+
+ /* Poll completions until the CQ is empty */
+ do {
+ status = iser_ib_poll_send_completions(cq_hdl, iser_chan);
+ } while (status != IBT_CQ_EMPTY);
+
+ /* We've emptied the CQ, rearm it before we're done here */
+ status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+ if (status != IBT_SUCCESS) {
+ /* Unexpected error */
+ ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: "
+ "ibt_enable_cq_notify error (%d)", status);
+ return;
+ }
+
+ /* Now, check for more completions after the rearm */
+ do {
+ status = iser_ib_poll_send_completions(cq_hdl, iser_chan);
+ } while (status != IBT_CQ_EMPTY);
+}
+
+static int
+iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan)
+{
+ ibt_wc_t wc[ISER_IB_SCQ_POLL_MAX];
+ ibt_wrid_t wrid;
+ idm_buf_t *idb = NULL;
+ idm_task_t *idt = NULL;
+ iser_wr_t *wr = NULL;
+ int i;
+ uint_t npoll = 0;
+ ibt_status_t status;
+ iser_conn_t *iser_conn;
+ idm_status_t idm_status;
+
+ iser_conn = iser_chan->ic_conn;
+
+ /*
+ * Poll ISER_IB_SCQ_POLL_MAX completions from the CQ.
+ */
+ status = ibt_poll_cq(cq_hdl, wc, ISER_IB_SCQ_POLL_MAX, &npoll);
+
+ if (status != IBT_SUCCESS) {
+ if (status != IBT_CQ_EMPTY) {
+ /* Unexpected error */
+ ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: ibt_poll_cq "
+ "error (%d)", status);
+ }
+ /* CQ is empty. Either way, move along... */
+ return (status);
+ }
+
+ /*
+ * Handle each of the completions we've polled
+ */
+ for (i = 0; i < npoll; i++) {
+
+ DTRACE_PROBE3(iser__send__cqe, iser_chan_t *, iser_chan,
+ ibt_wc_t *, &wc[i], ibt_wc_status_t, wc[i].wc_status);
+
+ /* Grab the wrid of the completion */
+ wrid = wc[i].wc_id;
+
+ /* Decrement this channel's SQ posted count */
+ mutex_enter(&iser_chan->ic_sq_post_lock);
+ iser_chan->ic_sq_post_count--;
+ mutex_exit(&iser_chan->ic_sq_post_lock);
+
+ /* Pull in the wr handle */
+ wr = (iser_wr_t *)(uintptr_t)wrid;
+ ASSERT(wr != NULL);
+
+ /* Set an idm_status for return to IDM */
+ idm_status = (wc[i].wc_status == IBT_WC_SUCCESS) ?
+ IDM_STATUS_SUCCESS : IDM_STATUS_FAIL;
+
+ /*
+ * A non-success status here indicates the QP went
+ * into an error state while this WR was being
+ * processed. This can also happen when the
+ * channel is closed on the remote end. Clean up
+ * the resources, then push CE_TRANSPORT_FAIL
+ * into IDM.
+ */
+ if (wc[i].wc_status != IBT_WC_SUCCESS) {
+ /*
+ * Free the resources attached to this
+ * completion.
+ */
+ if (wr->iw_msg != NULL) {
+ /* Free iser_msg handle */
+ iser_msg_free(wr->iw_msg);
+ }
+
+ if (wr->iw_pdu != NULL) {
+ /* Complete the PDU */
+ idm_pdu_complete(wr->iw_pdu, idm_status);
+ }
+
+ if (wr->iw_buf != NULL) {
+ /* Invoke buffer callback */
+ idb = wr->iw_buf;
+#ifdef DEBUG
+ bcopy(&wc[i],
+ &((iser_buf_t *)idb->idb_buf_private)->
+ buf_wc, sizeof (ibt_wc_t));
+#endif
+ idt = idb->idb_task_binding;
+ mutex_enter(&idt->idt_mutex);
+ if (wr->iw_type == ISER_WR_RDMAW) {
+ idm_buf_tx_to_ini_done(idt, idb,
+ IDM_STATUS_FAIL);
+ } else { /* ISER_WR_RDMAR */
+ idm_buf_rx_from_ini_done(idt, idb,
+ IDM_STATUS_FAIL);
+ }
+ }
+
+ /* Free the iser wr handle */
+ iser_wr_free(wr);
+
+ /*
+ * Tell IDM that the channel has gone down,
+ * unless he already knows.
+ */
+ mutex_enter(&iser_conn->ic_lock);
+ switch (iser_conn->ic_stage) {
+ case ISER_CONN_STAGE_IC_DISCONNECTED:
+ case ISER_CONN_STAGE_IC_FREED:
+ case ISER_CONN_STAGE_CLOSING:
+ case ISER_CONN_STAGE_CLOSED:
+ break;
+
+ default:
+ idm_conn_event(iser_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, idm_status);
+ iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
+ }
+ mutex_exit(&iser_conn->ic_lock);
+
+ /* Move onto the next completion */
+ continue;
+ }
+
+ /*
+ * For a success status, just invoke the PDU or
+ * buffer completion. We use our WR handle's
+ * "iw_type" here so that we can properly process
+ * because the CQE's opcode is invalid if the status
+ * is failed.
+ */
+ switch (wr->iw_type) {
+ case ISER_WR_SEND:
+ /* Free the msg handle */
+ ASSERT(wr->iw_msg != NULL);
+ iser_msg_free(wr->iw_msg);
+
+ if (wr->iw_pdu == NULL) {
+ /* This is a hello exchange message */
+ mutex_enter(&iser_conn->ic_lock);
+ if (iser_conn->ic_stage ==
+ ISER_CONN_STAGE_HELLOREPLY_SENT) {
+ /*
+ * We're on the target side,
+ * and have just successfully
+ * sent the HelloReply msg.
+ */
+ iser_conn->ic_stage =
+ ISER_CONN_STAGE_LOGGED_IN;
+ }
+ mutex_exit(&iser_conn->ic_lock);
+ } else {
+ /* This is a normal control message */
+ idm_pdu_complete(wr->iw_pdu, idm_status);
+ }
+
+ /* Free the wr handle */
+ iser_wr_free(wr);
+
+ break;
+
+ case ISER_WR_RDMAW:
+ case ISER_WR_RDMAR:
+ /*
+ * Invoke the appropriate callback;
+ * the buffer will be freed there.
+ */
+ idb = wr->iw_buf;
+#ifdef DEBUG
+ bcopy(&wc[i],
+ &((iser_buf_t *)idb->idb_buf_private)->buf_wc,
+ sizeof (ibt_wc_t));
+#endif
+ idt = idb->idb_task_binding;
+
+ mutex_enter(&idt->idt_mutex);
+ if (wr->iw_type == ISER_WR_RDMAW) {
+ idm_buf_tx_to_ini_done(idt, idb, idm_status);
+ } else {
+ idm_buf_rx_from_ini_done(idt, idb, idm_status);
+ }
+
+ /* Free the wr handle */
+ iser_wr_free(wr);
+
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ return (status);
+}
+
+void
+iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
+{
+ iser_chan_t *iser_chan;
+ ibt_status_t status;
+
+ iser_chan = (iser_chan_t *)arg;
+
+ /* Poll completions until the CQ is empty */
+ do {
+ status = iser_ib_poll_recv_completions(cq_hdl, iser_chan);
+ } while (status != IBT_CQ_EMPTY);
+
+ /* We've emptied the CQ, rearm it before we're done here */
+ status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+ if (status != IBT_SUCCESS) {
+ /* Unexpected error */
+ ISER_LOG(CE_NOTE, "iser_ib_recvcq_handler: "
+ "ibt_enable_cq_notify error (%d)", status);
+ return;
+ }
+
+ /* Now, check for more completions after the rearm */
+ do {
+ status = iser_ib_poll_recv_completions(cq_hdl, iser_chan);
+ } while (status != IBT_CQ_EMPTY);
+}
+
+static int
+iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan)
+{
+ ibt_wc_t wc;
+ iser_msg_t *msg;
+ iser_qp_t *iser_qp;
+ int status;
+
+ iser_qp = &(iser_chan->ic_qp);
+
+ bzero(&wc, sizeof (ibt_wc_t));
+ status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
+ if (status == IBT_CQ_EMPTY) {
+ /* CQ is empty, return */
+ return (status);
+ }
+
+ if (status != IBT_SUCCESS) {
+ /* Unexpected error */
+ ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: "
+ "ibt_poll_cq error (%d)", status);
+ mutex_enter(&iser_qp->qp_lock);
+ iser_qp->rq_level--;
+ mutex_exit(&iser_qp->qp_lock);
+ /* Free the msg handle (if we got it back) */
+ if ((msg = (iser_msg_t *)(uintptr_t)wc.wc_id) != NULL) {
+ iser_msg_free(msg);
+ }
+ return (status);
+ }
+
+ /* Retrieve the iSER msg handle */
+ msg = (iser_msg_t *)(uintptr_t)wc.wc_id;
+ ASSERT(msg != NULL);
+
+ /*
+ * Decrement the posted level in the RQ, then check
+ * to see if we need to fill the RQ back up (or if
+ * we are already on the taskq).
+ */
+ mutex_enter(&iser_qp->qp_lock);
+ iser_qp->rq_level--;
+
+ if ((iser_qp->rq_taskqpending == B_FALSE) &&
+ (iser_qp->rq_level <= iser_qp->rq_lwm)) {
+ /* Set the pending flag and fire off a post_recv */
+ iser_qp->rq_taskqpending = B_TRUE;
+ mutex_exit(&iser_qp->qp_lock);
+
+ status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv,
+ (void *)iser_chan->ic_chanhdl, DDI_NOSLEEP);
+
+ if (status != DDI_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: "
+ "task dispatch failed");
+ /* Failure to launch, unset the pending flag */
+ mutex_enter(&iser_qp->qp_lock);
+ iser_qp->rq_taskqpending = B_FALSE;
+ mutex_exit(&iser_qp->qp_lock);
+ }
+ } else {
+ mutex_exit(&iser_qp->qp_lock);
+ }
+
+ DTRACE_PROBE3(iser__recv__cqe, iser_chan_t *, iser_chan,
+ ibt_wc_t *, &wc, ibt_wc_status_t, wc.wc_status);
+ if (wc.wc_status != IBT_WC_SUCCESS) {
+ /*
+ * Tell IDM that the channel has gone down,
+ * unless he already knows.
+ */
+ mutex_enter(&iser_chan->ic_conn->ic_lock);
+ switch (iser_chan->ic_conn->ic_stage) {
+ case ISER_CONN_STAGE_IC_DISCONNECTED:
+ case ISER_CONN_STAGE_IC_FREED:
+ case ISER_CONN_STAGE_CLOSING:
+ case ISER_CONN_STAGE_CLOSED:
+ break;
+
+ default:
+ idm_conn_event(iser_chan->ic_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
+ iser_chan->ic_conn->ic_stage =
+ ISER_CONN_STAGE_CLOSING;
+ }
+ mutex_exit(&iser_chan->ic_conn->ic_lock);
+
+ iser_msg_free(msg);
+ return (DDI_SUCCESS);
+ } else {
+ /*
+ * We have an iSER message in, let's handle it.
+ * We will free the iser_msg_t later in this path,
+ * depending upon the action required.
+ */
+ iser_msg_handle(iser_chan, msg);
+ return (DDI_SUCCESS);
+ }
+}
+
+static void
+iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg)
+{
+ int opcode;
+ iser_ctrl_hdr_t *hdr = NULL;
+ iser_conn_t *iser_conn = chan->ic_conn;
+ int status;
+
+ hdr = (iser_ctrl_hdr_t *)(uintptr_t)msg->msg_ds.ds_va;
+ ASSERT(hdr != NULL);
+
+ opcode = hdr->opcode;
+ if (opcode == ISER_OPCODE_CTRL_TYPE_PDU) {
+ /*
+ * Handle an iSCSI Control PDU iSER message.
+ * Note we'll free the msg handle in the PDU callback.
+ */
+ status = iser_iscsihdr_handle(chan, msg);
+ if (status != DDI_SUCCESS) {
+ /*
+ * We are unable to handle this message, and
+ * have no way to recover from this. Fail the
+ * transport.
+ */
+ ISER_LOG(CE_NOTE, "iser_msg_handle: failed "
+ "iser_iscsihdr_handle");
+ iser_msg_free(msg);
+ idm_conn_event(iser_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
+ }
+ } else if (opcode == ISER_OPCODE_HELLO_MSG) { /* at the target */
+ /*
+ * We are currently not supporting Hello Exchange,
+ * since OFED iSER does not. May be revisited.
+ */
+ ASSERT(opcode != ISER_OPCODE_HELLO_MSG);
+
+ if (iser_conn->ic_type != ISER_CONN_TYPE_TGT) {
+ idm_conn_event(iser_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
+ }
+
+ iser_hello_hdr_t *hello_hdr = (iser_hello_hdr_t *)hdr;
+
+ ISER_LOG(CE_NOTE, "received Hello message: opcode[%d], "
+ "maxver[%d], minver[%d], iser_ird[%d], msg (0x%p)",
+ hello_hdr->opcode, hello_hdr->maxver, hello_hdr->minver,
+ ntohs(hello_hdr->iser_ird), (void *)msg);
+
+ mutex_enter(&iser_conn->ic_lock);
+
+ if (iser_conn->ic_stage != ISER_CONN_STAGE_HELLO_WAIT) {
+ /* target is not expected to receive a Hello */
+ idm_conn_event(iser_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
+ }
+
+ iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_SENT;
+ mutex_exit(&iser_conn->ic_lock);
+
+ /* Prepare and send a HelloReply message */
+ status = iser_xfer_helloreply_msg(chan);
+ if (status != ISER_STATUS_SUCCESS) {
+
+ mutex_enter(&iser_conn->ic_lock);
+ iser_conn->ic_stage =
+ ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL;
+ mutex_exit(&iser_conn->ic_lock);
+
+ idm_conn_event(iser_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, status);
+ }
+
+ /* Free this msg handle */
+ iser_msg_free(msg);
+
+ } else if (opcode == ISER_OPCODE_HELLOREPLY_MSG) { /* at initiator */
+
+ /*
+ * We are currently not supporting Hello Exchange,
+ * since OFED iSER does not. May be revisited.
+ */
+ ASSERT(opcode != ISER_OPCODE_HELLOREPLY_MSG);
+
+ if (iser_conn->ic_type != ISER_CONN_TYPE_INI) {
+ idm_conn_event(iser_conn->ic_idmc,
+ CE_TRANSPORT_FAIL, status);
+ }
+
+ iser_helloreply_hdr_t *hello_hdr = (iser_helloreply_hdr_t *)hdr;
+
+ ISER_LOG(CE_NOTE, "received Hello Reply message: opcode[%d], "
+ "maxver[%d], curver[%d], iser_ord[%d], msg (0x%p)",
+ hello_hdr->opcode, hello_hdr->maxver, hello_hdr->curver,
+ ntohs(hello_hdr->iser_ord), (void *)msg);
+
+ /* Free this msg handle */
+ iser_msg_free(msg);
+
+ /*
+ * Signal the receipt of HelloReply to the waiting thread
+ * so that the initiator can proceed to the Full Feature
+ * Phase.
+ */
+ mutex_enter(&iser_conn->ic_lock);
+ iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV;
+ cv_signal(&iser_conn->ic_stage_cv);
+ mutex_exit(&iser_conn->ic_lock);
+ } else {
+ /* Protocol error: free the msg handle and fail the session */
+ ISER_LOG(CE_NOTE, "iser_msg_handle: unsupported opcode (0x%x): "
+ "terminating session on IDM handle (0x%p)", opcode,
+ (void *) iser_conn->ic_idmc);
+
+ iser_msg_free(msg);
+ idm_conn_event(iser_conn->ic_idmc, CE_TRANSPORT_FAIL,
+ IDM_STATUS_FAIL);
+ }
+}
+
+#define IDM_PDU_OPCODE(PDU) \
+ ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
+
+/* network to host translation for 24b integers */
+static uint32_t
+n2h24(uchar_t *ptr)
+{
+ return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
+}
+
+/* ARGSUSED */
+static void
+iser_rx_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
+{
+ /* Free the iser msg handle and the PDU handle */
+ iser_msg_free((iser_msg_t *)pdu->isp_transport_private);
+ idm_pdu_free(pdu);
+}
+
+int
+iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg)
+{
+ idm_pdu_t *pdu;
+ uint8_t *iser_hdrp;
+ uint8_t *iscsi_hdrp;
+ iscsi_hdr_t *bhs;
+
+ pdu = idm_pdu_alloc_nosleep(sizeof (iscsi_hdr_t), 0);
+ pdu->isp_ic = chan->ic_conn->ic_idmc;
+ ASSERT(pdu->isp_ic != NULL);
+
+ /* Set the iser_msg handle into the transport-private field */
+ pdu->isp_transport_private = (void *)msg;
+
+ /* Set up a pointer in the pdu handle to the iSER header */
+ iser_hdrp = (uint8_t *)(uintptr_t)msg->msg_ds.ds_va;
+ if (iser_hdrp == NULL) {
+ ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iser_hdrp is NULL");
+ idm_pdu_free(pdu);
+ return (ISER_STATUS_FAIL);
+ }
+ pdu->isp_transport_hdr = (void *)iser_hdrp;
+ pdu->isp_transport_hdrlen = ISER_HEADER_LENGTH;
+
+ /*
+ * Set up a pointer to the iSCSI header, which is directly
+ * after the iSER header in the message.
+ */
+ iscsi_hdrp = ((uint8_t *)(uintptr_t)msg->msg_ds.ds_va) +
+ ISER_HEADER_LENGTH;
+ if (iscsi_hdrp == NULL) {
+ ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iscsi_hdrp is NULL");
+ idm_pdu_free(pdu);
+ return (ISER_STATUS_FAIL);
+ }
+ pdu->isp_hdr = (iscsi_hdr_t *)(uintptr_t)iscsi_hdrp;
+
+ /* Fill in the BHS */
+ bhs = pdu->isp_hdr;
+ pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
+ (bhs->hlength * sizeof (uint32_t));
+ pdu->isp_datalen = n2h24(bhs->dlength);
+ pdu->isp_callback = iser_rx_pdu_cb;
+
+ /*
+ * If datalen > 0, then non-scsi data may be present. Allocate
+ * space in the PDU handle and set a pointer to the data.
+ */
+ if (pdu->isp_datalen) {
+ pdu->isp_data = ((uint8_t *)(uintptr_t)pdu->isp_hdr) +
+ pdu->isp_hdrlen;
+ }
+
+ /* Process RX PDU */
+ idm_pdu_rx(pdu->isp_ic, pdu);
+
+ return (DDI_SUCCESS);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser_ib.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,1636 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
+#include <sys/iscsi_protocol.h>
+
+#include <sys/ib/clients/iser/iser.h>
+#include <sys/ib/clients/iser/iser_idm.h>
+
+/*
+ * iser_ib.c
+ * Routines for InfiniBand transport for iSER
+ *
+ * This file contains the routines to interface with the IBT API to attach and
+ * allocate IB resources, handle async events, and post recv work requests.
+ *
+ */
+
+static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid);
+static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid);
+
+static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid);
+static int iser_ib_free_hca(iser_hca_t *hca);
+static int iser_ib_update_hcaports(iser_hca_t *hca);
+static int iser_ib_init_hcas(void);
+static int iser_ib_fini_hcas(void);
+
+static iser_sbind_t *iser_ib_get_bind(
+ iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid);
+static int iser_ib_activate_port(
+ idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid);
+static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid);
+
+static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size);
+static void iser_ib_fini_qp(iser_qp_t *qp);
+
+static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size,
+ ibt_cq_hdl_t *cq_hdl);
+
+static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
+ ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
+ ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs);
+
+static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,
+ ibt_async_event_t *event);
+static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,
+ ibt_async_event_t *event);
+static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,
+ ibt_async_event_t *event);
+
+static struct ibt_clnt_modinfo_s iser_ib_modinfo = {
+ IBTI_V_CURR,
+ IBT_STORAGE_DEV,
+ iser_ib_async_handler,
+ NULL,
+ "iSER"
+};
+
+/*
+ * iser_ib_init
+ *
+ * This function registers the HCA drivers with IBTF and registers and binds
+ * iSER as a service with IBTF.
+ */
+int
+iser_ib_init(void)
+{
+ int status;
+
+ /* Register with IBTF */
+ status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state,
+ &iser_state->is_ibhdl);
+ if (status != DDI_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)",
+ status);
+ return (DDI_FAILURE);
+ }
+
+ /* Create the global work request kmem_cache */
+ iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache",
+ sizeof (iser_wr_t), 0, NULL, NULL, NULL,
+ iser_state, NULL, KM_SLEEP);
+
+ /* Populate our list of HCAs */
+ status = iser_ib_init_hcas();
+ if (status != DDI_SUCCESS) {
+ /* HCAs failed to initialize, tear it down */
+ kmem_cache_destroy(iser_state->iser_wr_cache);
+ (void) ibt_detach(iser_state->is_ibhdl);
+ iser_state->is_ibhdl = NULL;
+ ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs");
+ return (DDI_FAILURE);
+ }
+
+ /* Target will register iSER as a service with IBTF when required */
+
+ /* Target will bind this service when it comes online */
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_ib_fini
+ *
+ * This function unbinds and degisters the iSER service from IBTF
+ */
+int
+iser_ib_fini(void)
+{
+ /* IDM would have already disabled all the services */
+
+ /* Teardown the HCA list and associated resources */
+ if (iser_ib_fini_hcas() != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ /* Teardown the global work request kmem_cache */
+ kmem_cache_destroy(iser_state->iser_wr_cache);
+
+ /* Deregister with IBTF */
+ if (iser_state->is_ibhdl != NULL) {
+ (void) ibt_detach(iser_state->is_ibhdl);
+ iser_state->is_ibhdl = NULL;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_ib_register_service
+ *
+ * This function registers the iSER service using the RDMA-Aware Service ID.
+ */
+int
+iser_ib_register_service(idm_svc_t *idm_svc)
+{
+ ibt_srv_desc_t srvdesc;
+ iser_svc_t *iser_svc;
+ int status;
+
+ bzero(&srvdesc, sizeof (ibt_srv_desc_t));
+
+ /* Set up IBTI client callback handler from the CM */
+ srvdesc.sd_handler = iser_ib_cm_handler;
+
+ srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
+
+ iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
+
+ /* Register the service on the specified port */
+ status = ibt_register_service(
+ iser_state->is_ibhdl, &srvdesc,
+ iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL);
+
+ return (status);
+}
+
+/*
+ * iser_ib_bind_service
+ *
+ * This function binds a given iSER service on all available HCA ports
+ */
+int
+iser_ib_bind_service(idm_svc_t *idm_svc)
+{
+ iser_hca_t *hca;
+ ib_gid_t gid;
+ int num_ports = 0;
+ int num_binds = 0;
+ int status;
+ int i;
+
+ ASSERT(idm_svc != NULL);
+ ASSERT(idm_svc->is_iser_svc != NULL);
+
+ /* Register the iSER service on all available ports */
+ mutex_enter(&iser_state->is_hcalist_lock);
+
+ for (hca = list_head(&iser_state->is_hcalist);
+ hca != NULL;
+ hca = list_next(&iser_state->is_hcalist, hca)) {
+
+ for (i = 0; i < hca->hca_num_ports; i++) {
+ num_ports++;
+ if (hca->hca_port_info[i].p_linkstate !=
+ IBT_PORT_ACTIVE) {
+ /*
+ * Move on. We will attempt to bind service
+ * in our async handler if the port comes up
+ * at a later time.
+ */
+ continue;
+ }
+
+ gid = hca->hca_port_info[i].p_sgid_tbl[0];
+
+ /* If the port is already bound, skip */
+ if (iser_ib_get_bind(
+ idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) {
+
+ status = iser_ib_activate_port(
+ idm_svc, hca->hca_guid, gid);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE,
+ "iser_ib_bind_service: "
+ "iser_ib_activate_port failure "
+ "(0x%x)", status);
+ continue;
+ }
+ }
+ num_binds++;
+ }
+ }
+ mutex_exit(&iser_state->is_hcalist_lock);
+
+ if (num_binds) {
+ ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on "
+ "(%d) of (%d) ports", num_binds, num_ports);
+ return (ISER_STATUS_SUCCESS);
+ } else {
+ ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service");
+ return (ISER_STATUS_FAIL);
+ }
+}
+
+/*
+ * iser_ib_unbind_service
+ *
+ * This function unbinds a given service on a all HCA ports
+ */
+void
+iser_ib_unbind_service(idm_svc_t *idm_svc)
+{
+ iser_svc_t *iser_svc;
+ iser_sbind_t *is_sbind, *next_sb;
+
+ if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
+
+ iser_svc = idm_svc->is_iser_svc;
+
+ for (is_sbind = list_head(&iser_svc->is_sbindlist);
+ is_sbind != NULL;
+ is_sbind = next_sb) {
+ next_sb = list_next(&iser_svc->is_sbindlist, is_sbind);
+ ibt_unbind_service(iser_svc->is_srvhdl,
+ is_sbind->is_sbindhdl);
+ list_remove(&iser_svc->is_sbindlist, is_sbind);
+ kmem_free(is_sbind, sizeof (iser_sbind_t));
+ }
+ }
+}
+
+/* ARGSUSED */
+void
+iser_ib_deregister_service(idm_svc_t *idm_svc)
+{
+ iser_svc_t *iser_svc;
+
+ if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
+
+ iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
+ ibt_deregister_service(iser_state->is_ibhdl,
+ iser_svc->is_srvhdl);
+ ibt_release_ip_sid(iser_svc->is_svcid);
+ }
+}
+
+/*
+ * iser_ib_get_paths
+ * This function finds the IB path between the local and the remote address.
+ *
+ */
+int
+iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip,
+ ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip)
+{
+ ibt_ip_path_attr_t ipattr;
+ int status;
+
+ (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
+ ipattr.ipa_dst_ip = remote_ip;
+ ipattr.ipa_src_ip = *local_ip;
+ ipattr.ipa_max_paths = 1;
+ ipattr.ipa_ndst = 1;
+
+ (void) bzero(path, sizeof (ibt_path_info_t));
+ status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS,
+ &ipattr, path, NULL, path_src_ip);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths "
+ "failure: status (%d)", status);
+ return (status);
+ }
+
+ if (local_ip != NULL) {
+ ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]",
+ local_ip->un.ip4addr, remote_ip->un.ip4addr);
+ } else {
+ ISER_LOG(CE_NOTE, "iser_ib_get_paths success: "
+ "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr);
+ }
+
+ return (ISER_STATUS_SUCCESS);
+}
+
+/*
+ * iser_ib_alloc_rc_channel
+ *
+ * This function allocates a reliable communication channel using the specified
+ * channel attributes.
+ */
+iser_chan_t *
+iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip)
+{
+
+ iser_chan_t *chan;
+ ib_gid_t lgid;
+ uint8_t hca_port; /* from path */
+ iser_hca_t *hca;
+ ibt_path_ip_src_t path_src_ip;
+ ibt_rc_chan_alloc_args_t chanargs;
+ uint_t sq_size, rq_size;
+ int status;
+
+ chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP);
+
+ mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Lookup a path to the given destination */
+ status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path,
+ &path_src_ip);
+
+ if (status != ISER_STATUS_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)",
+ status);
+ mutex_destroy(&chan->ic_lock);
+ mutex_destroy(&chan->ic_sq_post_lock);
+ kmem_free(chan, sizeof (iser_chan_t));
+ return (NULL);
+ }
+
+ /* get the local gid from the path info */
+ lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
+
+ /* get the hca port from the path info */
+ hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num;
+
+ /* Lookup the hca using the gid in the path info */
+ hca = iser_ib_gid2hca(lgid);
+ if (hca == NULL) {
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
+ "to lookup HCA handle");
+ mutex_destroy(&chan->ic_lock);
+ mutex_destroy(&chan->ic_sq_post_lock);
+ kmem_free(chan, sizeof (iser_chan_t));
+ return (NULL);
+ }
+
+ /* Set up the iSER channel handle with HCA and IP data */
+ chan->ic_hca = hca;
+ chan->ic_localip = path_src_ip.ip_primary;
+ chan->ic_remoteip = *remote_ip;
+
+ /*
+ * Determine the queue sizes, based upon the HCA query data.
+ * For our Work Queues, we will use either our default value,
+ * or the HCA's maximum value, whichever is smaller.
+ */
+ sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE);
+ rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE);
+
+ /*
+ * For our Completion Queues, we again check the device maximum.
+ * We want to end up with CQs that are the next size up from the
+ * WQs they are servicing so that they have some overhead.
+ */
+ if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) {
+ chan->ic_sendcq_sz = sq_size + 1;
+ } else {
+ chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz;
+ sq_size = chan->ic_sendcq_sz - 1;
+ }
+
+ if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) {
+ chan->ic_recvcq_sz = rq_size + 1;
+ } else {
+ chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz;
+ rq_size = chan->ic_recvcq_sz - 1;
+ }
+
+ /* Initialize the iSER channel's QP handle */
+ iser_ib_init_qp(chan, sq_size, rq_size);
+
+ /* Set up the Send Completion Queue */
+ status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz,
+ &chan->ic_sendcq);
+ if (status != ISER_STATUS_SUCCESS) {
+ iser_ib_fini_qp(&chan->ic_qp);
+ mutex_destroy(&chan->ic_lock);
+ mutex_destroy(&chan->ic_sq_post_lock);
+ kmem_free(chan, sizeof (iser_chan_t));
+ return (NULL);
+ }
+ ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan);
+ ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION);
+
+ /* Set up the Receive Completion Queue */
+ status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz,
+ &chan->ic_recvcq);
+ if (status != ISER_STATUS_SUCCESS) {
+ (void) ibt_free_cq(chan->ic_sendcq);
+ iser_ib_fini_qp(&chan->ic_qp);
+ mutex_destroy(&chan->ic_lock);
+ mutex_destroy(&chan->ic_sq_post_lock);
+ kmem_free(chan, sizeof (iser_chan_t));
+ return (NULL);
+ }
+ ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan);
+ ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION);
+
+ /* Setup the channel arguments */
+ iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq,
+ sq_size, rq_size, hca->hca_pdhdl, &chanargs);
+
+ status = ibt_alloc_rc_channel(hca->hca_hdl,
+ IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
+ "ibt_alloc_rc_channel: status (%d)", status);
+ (void) ibt_free_cq(chan->ic_sendcq);
+ (void) ibt_free_cq(chan->ic_recvcq);
+ iser_ib_fini_qp(&chan->ic_qp);
+ mutex_destroy(&chan->ic_lock);
+ mutex_destroy(&chan->ic_sq_post_lock);
+ kmem_free(chan, sizeof (iser_chan_t));
+ return (NULL);
+ }
+
+ /* Set the 'channel' as the client private data */
+ (void) ibt_set_chan_private(chan->ic_chanhdl, chan);
+
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: "
+ "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d",
+ (void *)chan->ic_chanhdl,
+ (longlong_t)local_ip->un.ip4addr,
+ (longlong_t)remote_ip->un.ip4addr,
+ (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid,
+ (longlong_t)hca->hca_guid, hca_port);
+
+ return (chan);
+}
+
+/*
+ * iser_ib_open_rc_channel
+ * This function opens a RC connection on the given allocated RC channel
+ */
+int
+iser_ib_open_rc_channel(iser_chan_t *chan)
+{
+ ibt_ip_cm_info_t ipcm_info;
+ iser_private_data_t iser_priv_data;
+ ibt_chan_open_args_t ocargs;
+ ibt_rc_returns_t ocreturns;
+ int status;
+
+ mutex_enter(&chan->ic_lock);
+
+ /*
+ * For connection establishment, the initiator sends a CM REQ using the
+ * iSER RDMA-Aware Service ID. Included are the source and destination
+ * IP addresses, and the src port.
+ */
+ bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
+ ipcm_info.src_addr = chan->ic_localip;
+ ipcm_info.dst_addr = chan->ic_remoteip;
+ ipcm_info.src_port = chan->ic_lport;
+
+ /*
+ * The CM Private Data field defines the iSER connection parameters
+ * such as zero based virtual address exception (ZBVAE) and Send with
+ * invalidate Exception (SIE).
+ *
+ * Solaris IBT does not currently support ZBVAE or SIE.
+ */
+ iser_priv_data.rsvd1 = 0;
+ iser_priv_data.sie = 1;
+ iser_priv_data.zbvae = 1;
+
+ status = ibt_format_ip_private_data(&ipcm_info,
+ sizeof (iser_private_data_t), &iser_priv_data);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
+ mutex_exit(&chan->ic_lock);
+ return (status);
+ }
+
+ /*
+ * Set the SID we are attempting to connect to, based upon the
+ * remote port number.
+ */
+ chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport);
+
+ /* Set up the args for the channel open */
+ bzero(&ocargs, sizeof (ibt_chan_open_args_t));
+ ocargs.oc_path = &chan->ic_ibt_path;
+ ocargs.oc_cm_handler = iser_ib_cm_handler;
+ ocargs.oc_cm_clnt_private = iser_state;
+ ocargs.oc_rdma_ra_out = 4;
+ ocargs.oc_rdma_ra_in = 4;
+ ocargs.oc_path_retry_cnt = 2;
+ ocargs.oc_path_rnr_retry_cnt = 2;
+ ocargs.oc_priv_data_len = sizeof (iser_private_data_t);
+ ocargs.oc_priv_data = &iser_priv_data;
+
+ bzero(&ocreturns, sizeof (ibt_rc_returns_t));
+
+ status = ibt_open_rc_channel(chan->ic_chanhdl,
+ IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns);
+
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
+ mutex_exit(&chan->ic_lock);
+ return (status);
+ }
+
+ mutex_exit(&chan->ic_lock);
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_ib_close_rc_channel
+ * This function closes the RC channel related to this iser_chan handle.
+ * We invoke this in a non-blocking, no callbacks context.
+ */
+void
+iser_ib_close_rc_channel(iser_chan_t *chan)
+{
+ int status;
+
+ mutex_enter(&chan->ic_lock);
+ status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL,
+ 0, NULL, NULL, 0);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: "
+ "ibt_close_rc_channel failed: status (%d)", status);
+ }
+ mutex_exit(&chan->ic_lock);
+}
+
+/*
+ * iser_ib_free_rc_channel
+ *
+ * This function tears down an RC channel's QP initialization and frees it.
+ * Note that we do not need synchronization here; the channel has been
+ * closed already, so we should only have completion polling occuring. Once
+ * complete, we are free to free the IBTF channel, WQ and CQ resources, and
+ * our own related resources.
+ */
+void
+iser_ib_free_rc_channel(iser_chan_t *chan)
+{
+ iser_qp_t *iser_qp;
+
+ iser_qp = &chan->ic_qp;
+
+ /* Ensure the SQ is empty */
+ while (chan->ic_sq_post_count != 0) {
+ mutex_exit(&chan->ic_conn->ic_lock);
+ delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
+ mutex_enter(&chan->ic_conn->ic_lock);
+ }
+ mutex_destroy(&chan->ic_sq_post_lock);
+
+ /* Ensure the RQ is empty */
+ (void) ibt_flush_channel(chan->ic_chanhdl);
+ mutex_enter(&iser_qp->qp_lock);
+ while (iser_qp->rq_level != 0) {
+ mutex_exit(&iser_qp->qp_lock);
+ mutex_exit(&chan->ic_conn->ic_lock);
+ delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
+ mutex_enter(&chan->ic_conn->ic_lock);
+ mutex_enter(&iser_qp->qp_lock);
+ }
+
+ /* Free our QP handle */
+ mutex_exit(&iser_qp->qp_lock);
+ (void) iser_ib_fini_qp(iser_qp);
+
+ /* Free the IBT channel resources */
+ (void) ibt_free_channel(chan->ic_chanhdl);
+ chan->ic_chanhdl = NULL;
+
+ /* Free the CQs */
+ ibt_free_cq(chan->ic_sendcq);
+ ibt_free_cq(chan->ic_recvcq);
+
+ /* Free the chan handle */
+ mutex_destroy(&chan->ic_lock);
+ kmem_free(chan, sizeof (iser_chan_t));
+}
+
+/*
+ * iser_ib_post_recv
+ *
+ * This function handles keeping the RQ full on a given channel.
+ * This routine will mostly be run on a taskq, and will check the
+ * current fill level of the RQ, and post as many WRs as necessary
+ * to fill it again.
+ */
+void
+iser_ib_post_recv(void *arg)
+{
+ ibt_channel_hdl_t chanhdl;
+ iser_chan_t *chan;
+ iser_hca_t *hca;
+ iser_msg_t *msg;
+ ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX];
+ int rq_space, msg_ret;
+ int total_num, npost;
+ uint_t nposted;
+ int status, i;
+ iser_qp_t *iser_qp;
+ ib_gid_t lgid;
+
+ chanhdl = (ibt_channel_hdl_t)arg;
+
+ /* Pull our iSER channel handle from the private data */
+ chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
+
+ /* It is possible to run after the channel has been freed */
+ if (chan == NULL) {
+ return;
+ }
+ mutex_enter(&chan->ic_conn->ic_lock);
+
+ /* Bail out if the connection is closed; no need for more recv WRs */
+ if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) ||
+ (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) {
+ mutex_exit(&chan->ic_conn->ic_lock);
+ return;
+ }
+
+ /* get the QP handle from the iser_chan */
+ iser_qp = &chan->ic_qp;
+
+ /* get the local gid from the path info */
+ lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
+
+ /* get the hca port from the path info */
+ hca = iser_ib_gid2hca(lgid);
+ if (hca == NULL) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve "
+ "HCA handle");
+ mutex_exit(&chan->ic_conn->ic_lock);
+ return;
+ }
+
+ /* check for space to post on the RQ */
+ mutex_enter(&iser_qp->qp_lock);
+ rq_space = iser_qp->rq_depth - iser_qp->rq_level;
+ if (rq_space == 0) {
+ /* The RQ is full, clear the pending flag and return */
+ iser_qp->rq_taskqpending = B_FALSE;
+ mutex_exit(&iser_qp->qp_lock);
+ mutex_exit(&chan->ic_conn->ic_lock);
+ return;
+ }
+
+ /* Keep track of the lowest value for rq_min_post_level */
+ if (iser_qp->rq_level < iser_qp->rq_min_post_level)
+ iser_qp->rq_min_post_level = iser_qp->rq_level;
+
+ mutex_exit(&iser_qp->qp_lock);
+
+ /* we've room to post, so pull from the msg cache */
+ msg = iser_msg_get(hca, rq_space, &msg_ret);
+ if (msg == NULL) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles "
+ "available in msg cache currently");
+ /*
+ * There are no messages on the cache. Wait a half-
+ * second, then try again.
+ */
+ delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
+ status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv,
+ (void *)chanhdl, DDI_NOSLEEP);
+ if (status != DDI_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
+ "redispatch routine");
+ /* Failed to dispatch, clear pending flag */
+ mutex_enter(&iser_qp->qp_lock);
+ iser_qp->rq_taskqpending = B_FALSE;
+ mutex_exit(&iser_qp->qp_lock);
+ }
+ mutex_exit(&chan->ic_conn->ic_lock);
+ return;
+ }
+
+ if (msg_ret != rq_space) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of "
+ "messages not allocated: requested (%d) allocated (%d)",
+ rq_space, msg_ret);
+ /* We got some, but not all, of our requested depth */
+ rq_space = msg_ret;
+ }
+
+ /*
+ * Now, walk through the allocated WRs and post them,
+ * ISER_IB_RQ_POST_MAX (or less) at a time.
+ */
+ wrlist = &wr[0];
+ total_num = rq_space;
+
+ while (total_num) {
+ /* determine the number to post on this iteration */
+ npost = (total_num > ISER_IB_RQ_POST_MAX) ?
+ ISER_IB_RQ_POST_MAX : total_num;
+
+ /* build a list of WRs from the msg list */
+ for (i = 0; i < npost; i++) {
+ wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg;
+ wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE;
+ wrlist[i].wr_sgl = &msg->msg_ds;
+ msg = msg->nextp;
+ }
+
+ /* post the list to the RQ */
+ nposted = 0;
+ status = ibt_post_recv(chanhdl, wrlist, npost, &nposted);
+ if ((status != IBT_SUCCESS) || (nposted != npost)) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv "
+ "failed: requested (%d) posted (%d) status (%d)",
+ npost, nposted, status);
+ total_num -= nposted;
+ break;
+ }
+
+ /* decrement total number to post by the number posted */
+ total_num -= nposted;
+ }
+
+ mutex_enter(&iser_qp->qp_lock);
+ if (total_num != 0) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, "
+ "failed to post (%d) WRs", total_num);
+ iser_qp->rq_level += rq_space - total_num;
+ } else {
+ iser_qp->rq_level += rq_space;
+ }
+
+ /*
+ * Now that we've filled the RQ, check that all of the recv WRs
+ * haven't just been immediately consumed. If so, taskqpending is
+ * still B_TRUE, so we need to fire off a taskq thread to post
+ * more WRs.
+ */
+ if (iser_qp->rq_level == 0) {
+ mutex_exit(&iser_qp->qp_lock);
+ status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv,
+ (void *)chanhdl, DDI_NOSLEEP);
+ if (status != DDI_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
+ "dispatch followup routine");
+ /* Failed to dispatch, clear pending flag */
+ mutex_enter(&iser_qp->qp_lock);
+ iser_qp->rq_taskqpending = B_FALSE;
+ mutex_exit(&iser_qp->qp_lock);
+ }
+ } else {
+ /*
+ * We're done, we've filled the RQ. Clear the taskq
+ * flag so that we can run again.
+ */
+ iser_qp->rq_taskqpending = B_FALSE;
+ mutex_exit(&iser_qp->qp_lock);
+ }
+
+ mutex_exit(&chan->ic_conn->ic_lock);
+}
+
+/*
+ * iser_ib_handle_portup_event()
+ * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event.
+ *
+ * To facilitate a seamless bringover of the port and configure the CM service
+ * for inbound iSER service requests on this newly active port, the existing
+ * IDM services will be checked for iSER support.
+ * If an iSER service was already created, then this service will simply be
+ * bound to the gid of the newly active port. If on the other hand, the CM
+ * service did not exist, i.e. only socket communication, then a new CM
+ * service will be first registered with the saved service parameters and
+ * then bound to the newly active port.
+ *
+ */
+/* ARGSUSED */
+static void
+iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
+{
+ iser_hca_t *hca;
+ ib_gid_t gid;
+ idm_svc_t *idm_svc;
+ int status;
+
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)",
+ (longlong_t)event->ev_hca_guid, event->ev_port);
+
+ /*
+ * Query all ports on the HCA and update the port information
+ * maintainted in the iser_hca_t structure
+ */
+ hca = iser_ib_guid2hca(event->ev_hca_guid);
+ if (hca == NULL) {
+
+ /* HCA is just made available, first port on that HCA */
+ hca = iser_ib_alloc_hca(event->ev_hca_guid);
+
+ mutex_enter(&iser_state->is_hcalist_lock);
+ list_insert_tail(&iser_state->is_hcalist, hca);
+ iser_state->is_num_hcas++;
+ mutex_exit(&iser_state->is_hcalist_lock);
+
+ } else {
+
+ status = iser_ib_update_hcaports(hca);
+
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
+ "status(0x%x): iser_ib_update_hcaports failed: "
+ "HCA(0x%llx) port(%d)", status,
+ (longlong_t)event->ev_hca_guid, event->ev_port);
+ return;
+ }
+ }
+
+ gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
+
+ /*
+ * Iterate through the global list of IDM target services
+ * and check for existing iSER CM service.
+ */
+ mutex_enter(&idm.idm_global_mutex);
+ for (idm_svc = list_head(&idm.idm_tgt_svc_list);
+ idm_svc != NULL;
+ idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) {
+
+
+ if (idm_svc->is_iser_svc == NULL) {
+
+ /* Establish a new CM service for iSER requests */
+ status = iser_tgt_svc_create(
+ &idm_svc->is_svc_req, idm_svc);
+
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
+ "status(0x%x): iser_tgt_svc_create failed: "
+ "HCA(0x%llx) port(%d)", status,
+ (longlong_t)event->ev_hca_guid,
+ event->ev_port);
+
+ continue;
+ }
+ }
+
+ status = iser_ib_activate_port(
+ idm_svc, event->ev_hca_guid, gid);
+ if (status != IBT_SUCCESS) {
+
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
+ "status(0x%x): Bind service on port "
+ "(%llx:%llx) failed",
+ status, (longlong_t)gid.gid_prefix,
+ (longlong_t)gid.gid_guid);
+
+ continue;
+ }
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound "
+ "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
+ event->ev_port);
+ }
+ mutex_exit(&idm.idm_global_mutex);
+
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: "
+ "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
+ event->ev_port);
+}
+
+/*
+ * iser_ib_handle_portdown_event()
+ * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error.
+ *
+ * Unconfigure the CM service on the deactivated port and teardown the
+ * connections that are using the CM service.
+ */
+/* ARGSUSED */
+static void
+iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
+{
+ iser_hca_t *hca;
+ ib_gid_t gid;
+ int status;
+
+ /*
+ * Query all ports on the HCA and update the port information
+ * maintainted in the iser_hca_t structure
+ */
+ hca = iser_ib_guid2hca(event->ev_hca_guid);
+ ASSERT(hca != NULL);
+
+ status = iser_ib_update_hcaports(hca);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): "
+ "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)",
+ status, (longlong_t)event->ev_hca_guid, event->ev_port);
+ return;
+ }
+
+ /* get the gid of the new port */
+ gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
+ iser_ib_deactivate_port(event->ev_hca_guid, gid);
+
+ ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: "
+ "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
+ event->ev_port);
+}
+
+/*
+ * iser_ib_handle_hca_detach_event()
+ * Quiesce all activity bound for the port, teardown the connection, unbind
+ * iSER services on all ports and release the HCA handle.
+ */
+/* ARGSUSED */
+static void
+iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
+{
+ iser_hca_t *nexthca, *hca;
+ int i, status;
+
+ ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)",
+ (longlong_t)event->ev_hca_guid);
+
+ hca = iser_ib_guid2hca(event->ev_hca_guid);
+ for (i = 0; i < hca->hca_num_ports; i++) {
+ iser_ib_deactivate_port(hca->hca_guid,
+ hca->hca_port_info[i].p_sgid_tbl[0]);
+ }
+
+ /*
+ * Update the HCA list maintained in the iser_state. Free the
+ * resources allocated to the HCA, i.e. caches, protection domain
+ */
+ mutex_enter(&iser_state->is_hcalist_lock);
+
+ for (hca = list_head(&iser_state->is_hcalist);
+ hca != NULL;
+ hca = nexthca) {
+
+ nexthca = list_next(&iser_state->is_hcalist, hca);
+
+ if (hca->hca_guid == event->ev_hca_guid) {
+
+ list_remove(&iser_state->is_hcalist, hca);
+ iser_state->is_num_hcas--;
+
+ status = iser_ib_free_hca(hca);
+ if (status != DDI_SUCCESS) {
+ ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: "
+ "Failed to free hca(%p)", (void *)hca);
+ list_insert_tail(&iser_state->is_hcalist, hca);
+ iser_state->is_num_hcas++;
+ }
+ /* No way to return status to IBT if this fails */
+ }
+ }
+ mutex_exit(&iser_state->is_hcalist_lock);
+
+}
+
+/*
+ * iser_ib_async_handler
+ * An IBT Asynchronous Event handler is registered it with the framework and
+ * passed via the ibt_attach() routine. This function handles the following
+ * asynchronous events.
+ * IBT_EVENT_PORT_UP
+ * IBT_ERROR_PORT_DOWN
+ * IBT_HCA_ATTACH_EVENT
+ * IBT_HCA_DETACH_EVENT
+ */
+/* ARGSUSED */
+void
+iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
+ ibt_async_event_t *event)
+{
+ switch (code) {
+ case IBT_EVENT_PORT_UP:
+ iser_ib_handle_portup_event(hdl, event);
+ break;
+
+ case IBT_ERROR_PORT_DOWN:
+ iser_ib_handle_portdown_event(hdl, event);
+ break;
+
+ case IBT_HCA_ATTACH_EVENT:
+ /*
+ * A new HCA device is available for use, ignore this
+ * event because the corresponding IBT_EVENT_PORT_UP
+ * events will get triggered and handled accordingly.
+ */
+ break;
+
+ case IBT_HCA_DETACH_EVENT:
+ iser_ib_handle_hca_detach_event(hdl, event);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * iser_ib_init_hcas
+ *
+ * This function opens all the HCA devices, gathers the HCA state information
+ * and adds the HCA handle for each HCA found in the iser_soft_state.
+ */
+static int
+iser_ib_init_hcas(void)
+{
+ ib_guid_t *guid;
+ int num_hcas;
+ int i;
+ iser_hca_t *hca;
+
+ /* Retrieve the HCA list */
+ num_hcas = ibt_get_hca_list(&guid);
+ if (num_hcas == 0) {
+ /*
+ * This shouldn't happen, but might if we have all HCAs
+ * detach prior to initialization.
+ */
+ return (DDI_FAILURE);
+ }
+
+ /* Initialize the hcalist lock */
+ mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Create the HCA list */
+ list_create(&iser_state->is_hcalist, sizeof (iser_hca_t),
+ offsetof(iser_hca_t, hca_node));
+
+ for (i = 0; i < num_hcas; i++) {
+
+ ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA "
+ "(0x%llx)", (longlong_t)guid[i]);
+
+ hca = iser_ib_alloc_hca(guid[i]);
+ if (hca == NULL) {
+ /* This shouldn't happen, teardown and fail */
+ (void) iser_ib_fini_hcas();
+ (void) ibt_free_hca_list(guid, num_hcas);
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&iser_state->is_hcalist_lock);
+ list_insert_tail(&iser_state->is_hcalist, hca);
+ iser_state->is_num_hcas++;
+ mutex_exit(&iser_state->is_hcalist_lock);
+
+ }
+
+ /* Free the IBT HCA list */
+ (void) ibt_free_hca_list(guid, num_hcas);
+
+ /* Check that we've initialized at least one HCA */
+ mutex_enter(&iser_state->is_hcalist_lock);
+ if (list_is_empty(&iser_state->is_hcalist)) {
+ ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize "
+ "any HCAs");
+
+ mutex_exit(&iser_state->is_hcalist_lock);
+ (void) iser_ib_fini_hcas();
+ return (DDI_FAILURE);
+ }
+ mutex_exit(&iser_state->is_hcalist_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_ib_fini_hcas
+ *
+ * Teardown the iSER HCA list initialized above.
+ */
+static int
+iser_ib_fini_hcas(void)
+{
+ iser_hca_t *nexthca, *hca;
+ int status;
+
+ mutex_enter(&iser_state->is_hcalist_lock);
+ for (hca = list_head(&iser_state->is_hcalist);
+ hca != NULL;
+ hca = nexthca) {
+
+ nexthca = list_next(&iser_state->is_hcalist, hca);
+
+ list_remove(&iser_state->is_hcalist, hca);
+
+ status = iser_ib_free_hca(hca);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free "
+ "HCA during fini");
+ list_insert_tail(&iser_state->is_hcalist, hca);
+ return (DDI_FAILURE);
+ }
+
+ iser_state->is_num_hcas--;
+
+ }
+ mutex_exit(&iser_state->is_hcalist_lock);
+ list_destroy(&iser_state->is_hcalist);
+ mutex_destroy(&iser_state->is_hcalist_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_ib_alloc_hca
+ *
+ * This function opens the given HCA device, gathers the HCA state information
+ * and adds the HCA handle
+ */
+static iser_hca_t *
+iser_ib_alloc_hca(ib_guid_t guid)
+{
+ iser_hca_t *hca;
+ int status;
+
+ /* Allocate an iser_hca_t HCA handle */
+ hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP);
+
+ /* Open this HCA */
+ status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:"
+ " guid (0x%llx) status (0x%x)", (longlong_t)guid, status);
+ kmem_free(hca, sizeof (iser_hca_t));
+ return (NULL);
+ }
+
+ hca->hca_guid = guid;
+ hca->hca_clnt_hdl = iser_state->is_ibhdl;
+
+ /* Query the HCA */
+ status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca "
+ "failure: guid (0x%llx) status (0x%x)",
+ (longlong_t)guid, status);
+ (void) ibt_close_hca(hca->hca_hdl);
+ kmem_free(hca, sizeof (iser_hca_t));
+ return (NULL);
+ }
+
+ /* Query all ports on the HCA */
+ status = ibt_query_hca_ports(hca->hca_hdl, 0,
+ &hca->hca_port_info, &hca->hca_num_ports,
+ &hca->hca_port_info_sz);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: "
+ "ibt_query_hca_ports failure: guid (0x%llx) "
+ "status (0x%x)", (longlong_t)guid, status);
+ (void) ibt_close_hca(hca->hca_hdl);
+ kmem_free(hca, sizeof (iser_hca_t));
+ return (NULL);
+ }
+
+ /* Allocate a single PD on this HCA */
+ status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS,
+ &hca->hca_pdhdl);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd "
+ "failure: guid (0x%llx) status (0x%x)",
+ (longlong_t)guid, status);
+ (void) ibt_close_hca(hca->hca_hdl);
+ ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz);
+ kmem_free(hca, sizeof (iser_hca_t));
+ return (NULL);
+ }
+
+ /* Initialize the message and data MR caches for this HCA */
+ iser_init_hca_caches(hca);
+
+ return (hca);
+}
+
+static int
+iser_ib_free_hca(iser_hca_t *hca)
+{
+ int status;
+ ibt_hca_portinfo_t *hca_port_info;
+ uint_t hca_port_info_sz;
+
+ ASSERT(hca != NULL);
+ if (hca->hca_failed)
+ return (DDI_FAILURE);
+
+ hca_port_info = hca->hca_port_info;
+ hca_port_info_sz = hca->hca_port_info_sz;
+
+ /*
+ * Free the memory regions before freeing
+ * the associated protection domain
+ */
+ iser_fini_hca_caches(hca);
+
+ status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD "
+ "status=0x%x", status);
+ goto out_caches;
+ }
+
+ status = ibt_close_hca(hca->hca_hdl);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA "
+ "status=0x%x", status);
+ goto out_pd;
+ }
+
+ ibt_free_portinfo(hca_port_info, hca_port_info_sz);
+
+ kmem_free(hca, sizeof (iser_hca_t));
+ return (DDI_SUCCESS);
+
+ /*
+ * We only managed to partially tear down the HCA, try to put it back
+ * like it was before returning.
+ */
+out_pd:
+ status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl);
+ if (status != IBT_SUCCESS) {
+ hca->hca_failed = B_TRUE;
+ /* Report error and exit */
+ ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD "
+ "status=0x%x", status);
+ return (DDI_FAILURE);
+ }
+
+out_caches:
+ iser_init_hca_caches(hca);
+
+ return (DDI_FAILURE);
+}
+
+static int
+iser_ib_update_hcaports(iser_hca_t *hca)
+{
+ ibt_hca_portinfo_t *pinfop, *oldpinfop;
+ uint_t size, oldsize, nport;
+ int status;
+
+ ASSERT(hca != NULL);
+
+ status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status);
+ return (status);
+ }
+
+ oldpinfop = hca->hca_port_info;
+ oldsize = hca->hca_port_info_sz;
+ hca->hca_port_info = pinfop;
+ hca->hca_port_info_sz = size;
+
+ (void) ibt_free_portinfo(oldpinfop, oldsize);
+
+ return (IBT_SUCCESS);
+}
+
+/*
+ * iser_ib_gid2hca
+ * Given a gid, find the corresponding hca
+ */
+iser_hca_t *
+iser_ib_gid2hca(ib_gid_t gid)
+{
+
+ iser_hca_t *hca;
+ int i;
+
+ mutex_enter(&iser_state->is_hcalist_lock);
+ for (hca = list_head(&iser_state->is_hcalist);
+ hca != NULL;
+ hca = list_next(&iser_state->is_hcalist, hca)) {
+
+ for (i = 0; i < hca->hca_num_ports; i++) {
+ if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix ==
+ gid.gid_prefix) &&
+ (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid ==
+ gid.gid_guid)) {
+
+ mutex_exit(&iser_state->is_hcalist_lock);
+
+ return (hca);
+ }
+ }
+ }
+ mutex_exit(&iser_state->is_hcalist_lock);
+ return (NULL);
+}
+
+/*
+ * iser_ib_guid2hca
+ * Given a HCA guid, find the corresponding HCA
+ */
+iser_hca_t *
+iser_ib_guid2hca(ib_guid_t guid)
+{
+
+ iser_hca_t *hca;
+
+ mutex_enter(&iser_state->is_hcalist_lock);
+ for (hca = list_head(&iser_state->is_hcalist);
+ hca != NULL;
+ hca = list_next(&iser_state->is_hcalist, hca)) {
+
+ if (hca->hca_guid == guid) {
+ mutex_exit(&iser_state->is_hcalist_lock);
+ return (hca);
+ }
+ }
+ mutex_exit(&iser_state->is_hcalist_lock);
+ return (NULL);
+}
+
+/*
+ * iser_ib_conv_sockaddr2ibtaddr
+ * This function converts a socket address into the IBT format
+ */
+void iser_ib_conv_sockaddr2ibtaddr(
+ idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr)
+{
+ if (saddr == NULL) {
+ ibt_addr->family = AF_UNSPEC;
+ ibt_addr->un.ip4addr = 0;
+ } else {
+ switch (saddr->sin.sa_family) {
+ case AF_INET:
+
+ ibt_addr->family = saddr->sin4.sin_family;
+ ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr;
+ break;
+
+ case AF_INET6:
+
+ ibt_addr->family = saddr->sin6.sin6_family;
+ ibt_addr->un.ip6addr = saddr->sin6.sin6_addr;
+ break;
+
+ default:
+ ibt_addr->family = AF_UNSPEC;
+ }
+
+ }
+}
+
+/*
+ * iser_ib_conv_ibtaddr2sockaddr
+ * This function converts an IBT ip address handle to a sockaddr
+ */
+void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
+ ibt_ip_addr_t *ibt_addr, in_port_t port)
+{
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+
+ switch (ibt_addr->family) {
+ case AF_INET:
+ case AF_UNSPEC:
+
+ sin = (struct sockaddr_in *)ibt_addr;
+ sin->sin_port = ntohs(port);
+ bcopy(sin, ss, sizeof (struct sockaddr_in));
+ break;
+
+ case AF_INET6:
+
+ sin6 = (struct sockaddr_in6 *)ibt_addr;
+ sin6->sin6_port = ntohs(port);
+ bcopy(sin6, ss, sizeof (struct sockaddr_in6));
+ break;
+
+ default:
+ ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: "
+ "unknown family type: 0x%x", ibt_addr->family);
+ }
+}
+
+/*
+ * iser_ib_setup_cq
+ * This function sets up the Completion Queue size and allocates the specified
+ * Completion Queue
+ */
+static int
+iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl)
+{
+
+ ibt_cq_attr_t cq_attr;
+ int status;
+
+ cq_attr.cq_size = cq_size;
+ cq_attr.cq_sched = 0;
+ cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
+
+ /* Allocate a Completion Queue */
+ status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)",
+ status);
+ return (status);
+ }
+
+ return (ISER_STATUS_SUCCESS);
+}
+
+/*
+ * iser_ib_setup_chanargs
+ *
+ */
+static void
+iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
+ ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
+ ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs)
+{
+
+ bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t));
+
+ /*
+ * Set up the size of the channels send queue, receive queue and the
+ * maximum number of elements in a scatter gather list of work requests
+ * posted to the send and receive queues.
+ */
+ cargs->rc_sizes.cs_sq = sq_size;
+ cargs->rc_sizes.cs_rq = rq_size;
+ cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE;
+ cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE;
+
+ /*
+ * All Work requests signaled on a WR basis will receive a send
+ * request completion.
+ */
+ cargs->rc_flags = IBT_ALL_SIGNALED;
+
+ /* Enable RDMA read and RDMA write on the channel end points */
+ cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
+
+ /* Set the local hca port on which the channel is allocated */
+ cargs->rc_hca_port_num = hca_port;
+
+ /* Set the Send and Receive Completion Queue handles */
+ cargs->rc_scq = scq_hdl;
+ cargs->rc_rcq = rcq_hdl;
+
+ /* Set the protection domain associated with the channel */
+ cargs->rc_pd = hca_pdhdl;
+
+ /* No SRQ usage */
+ cargs->rc_srq = NULL;
+}
+
+/*
+ * iser_ib_init_qp
+ * Initialize the QP handle
+ */
+void
+iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size)
+{
+ /* Initialize the handle lock */
+ mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Record queue sizes */
+ chan->ic_qp.sq_size = sq_size;
+ chan->ic_qp.rq_size = rq_size;
+
+ /* Initialize the RQ monitoring data */
+ chan->ic_qp.rq_depth = rq_size;
+ chan->ic_qp.rq_level = 0;
+ chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100;
+
+ /* Initialize the taskq flag */
+ chan->ic_qp.rq_taskqpending = B_FALSE;
+}
+
+/*
+ * iser_ib_fini_qp
+ * Teardown the QP handle
+ */
+void
+iser_ib_fini_qp(iser_qp_t *qp)
+{
+ /* Destroy the handle lock */
+ mutex_destroy(&qp->qp_lock);
+}
+
+static int
+iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid)
+{
+ iser_svc_t *iser_svc;
+ iser_sbind_t *is_sbind;
+ int status;
+
+ iser_svc = idm_svc->is_iser_svc;
+
+ /*
+ * Save the address of the service bind handle in the
+ * iser_svc_t to undo the service binding at a later time
+ */
+ is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP);
+ is_sbind->is_gid = gid;
+ is_sbind->is_guid = guid;
+
+ status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL,
+ idm_svc, &is_sbind->is_sbindhdl);
+
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): "
+ "Bind service(%llx) on port(%llx:%llx) failed",
+ status, (longlong_t)iser_svc->is_svcid,
+ (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
+
+ kmem_free(is_sbind, sizeof (iser_sbind_t));
+
+ return (status);
+ }
+
+ list_insert_tail(&iser_svc->is_sbindlist, is_sbind);
+
+ return (IBT_SUCCESS);
+}
+
+static void
+iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid)
+{
+ iser_svc_t *iser_svc;
+ iser_conn_t *iser_conn;
+ iser_sbind_t *is_sbind;
+ idm_conn_t *idm_conn;
+
+ /*
+ * Iterate through the global list of IDM target connections.
+ * Issue a TRANSPORT_FAIL for any connections on this port, and
+ * if there is a bound service running on the port, tear it down.
+ */
+ mutex_enter(&idm.idm_global_mutex);
+ for (idm_conn = list_head(&idm.idm_tgt_conn_list);
+ idm_conn != NULL;
+ idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) {
+
+ if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) {
+ /* this is not an iSER connection, skip it */
+ continue;
+ }
+
+ iser_conn = idm_conn->ic_transport_private;
+ if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) {
+ /* this iSER connection is on a different port */
+ continue;
+ }
+
+ /* Fail the transport for this connection */
+ idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
+
+ if (idm_conn->ic_conn_type == CONN_TYPE_INI) {
+ /* initiator connection, nothing else to do */
+ continue;
+ }
+
+ /* Check for a service binding */
+ iser_svc = idm_conn->ic_svc_binding->is_iser_svc;
+ is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid);
+ if (is_sbind != NULL) {
+ /* This service is still bound, tear it down */
+ ibt_unbind_service(iser_svc->is_srvhdl,
+ is_sbind->is_sbindhdl);
+ list_remove(&iser_svc->is_sbindlist, is_sbind);
+ kmem_free(is_sbind, sizeof (iser_sbind_t));
+ }
+ }
+ mutex_exit(&idm.idm_global_mutex);
+}
+
+static iser_sbind_t *
+iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid)
+{
+ iser_sbind_t *is_sbind;
+
+ for (is_sbind = list_head(&iser_svc->is_sbindlist);
+ is_sbind != NULL;
+ is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) {
+
+ if ((is_sbind->is_guid == hca_guid) &&
+ (is_sbind->is_gid.gid_prefix == gid.gid_prefix) &&
+ (is_sbind->is_gid.gid_guid == gid.gid_guid)) {
+ return (is_sbind);
+ }
+ }
+ return (NULL);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser_idm.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,1181 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+#include <sys/socket.h> /* networking stuff */
+#include <sys/sysmacros.h> /* offsetof */
+
+#include <sys/ib/clients/iser/iser.h>
+#include <sys/ib/clients/iser/iser_idm.h>
+
+/*
+ * iSER transport routines
+ *
+ * All transport functions except iser_tgt_svc_create() are called through
+ * the ops vector, iser_tgt_svc_create() is called from the async handler
+ * inaddition to being called by the ULP
+ */
+
+static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu);
+
+static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
+static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
+static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic);
+static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic);
+static void iser_notice_key_values(struct idm_conn_s *ic,
+ nvlist_t *negotiated_nvl);
+static idm_status_t iser_free_task_rsrcs(idm_task_t *idt);
+static kv_status_t iser_negotiate_key_values(idm_conn_t *ic,
+ nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
+static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value,
+ const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value,
+ uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl,
+ nvlist_t *negotiated_nvl);
+static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value,
+ const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl,
+ nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
+static kv_status_t iser_handle_digest(nvpair_t *choices,
+ const idm_kv_xlate_t *ikvx, nvlist_t *request_nvl, nvlist_t *response_nvl,
+ nvlist_t *negotiated_nvl);
+static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx,
+ nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
+static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl,
+ nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
+static boolean_t iser_conn_is_capable(idm_conn_req_t *ic,
+ idm_transport_caps_t *caps);
+static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen);
+static idm_status_t iser_buf_setup(idm_buf_t *idb);
+static void iser_buf_teardown(idm_buf_t *idb);
+static void iser_buf_free(idm_buf_t *idb);
+static void iser_tgt_svc_destroy(struct idm_svc_s *is);
+static idm_status_t iser_tgt_svc_online(struct idm_svc_s *is);
+static void iser_tgt_svc_offline(struct idm_svc_s *is);
+static idm_status_t iser_tgt_conn_connect(struct idm_conn_s *ic);
+static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr,
+ struct idm_conn_s *ic);
+static void iser_conn_destroy(struct idm_conn_s *ic);
+static idm_status_t iser_ini_conn_connect(struct idm_conn_s *ic);
+static void iser_conn_disconnect(struct idm_conn_s *ic);
+
+/*
+ * iSER IDM transport operations
+ */
+idm_transport_ops_t iser_transport_ops = {
+ &iser_pdu_tx, /* it_tx_pdu */
+ &iser_buf_tx_to_ini, /* it_buf_tx_to_ini */
+ &iser_buf_rx_from_ini, /* it_buf_rx_from_ini */
+ NULL, /* it_rx_datain */
+ NULL, /* it_rx_rtt */
+ NULL, /* it_rx_dataout */
+ NULL, /* it_alloc_conn_rsrc */
+ NULL, /* it_free_conn_rsrc */
+ &iser_tgt_enable_datamover, /* it_tgt_enable_datamover */
+ &iser_ini_enable_datamover, /* it_ini_enable_datamover */
+ NULL, /* it_conn_terminate */
+ &iser_free_task_rsrcs, /* it_free_task_rsrc */
+ &iser_negotiate_key_values, /* it_negotiate_key_values */
+ &iser_notice_key_values, /* it_notice_key_values */
+ &iser_conn_is_capable, /* it_conn_is_capable */
+ &iser_buf_alloc, /* it_buf_alloc */
+ &iser_buf_free, /* it_buf_free */
+ &iser_buf_setup, /* it_buf_setup */
+ &iser_buf_teardown, /* it_buf_teardown */
+ &iser_tgt_svc_create, /* it_tgt_svc_create */
+ &iser_tgt_svc_destroy, /* it_tgt_svc_destroy */
+ &iser_tgt_svc_online, /* it_tgt_svc_online */
+ &iser_tgt_svc_offline, /* it_tgt_svc_offline */
+ &iser_conn_destroy, /* it_tgt_conn_destroy */
+ &iser_tgt_conn_connect, /* it_tgt_conn_connect */
+ &iser_conn_disconnect, /* it_tgt_conn_disconnect */
+ &iser_ini_conn_create, /* it_ini_conn_create */
+ &iser_conn_destroy, /* it_ini_conn_destroy */
+ &iser_ini_conn_connect, /* it_ini_conn_connect */
+ &iser_conn_disconnect /* it_ini_conn_disconnect */
+};
+
+/*
+ * iSER IDM transport capabilities
+ */
+idm_transport_caps_t iser_transport_caps = {
+ 0 /* flags */
+};
+
+int
+iser_idm_register()
+{
+ idm_transport_attr_t attr;
+ idm_status_t status;
+
+ attr.type = IDM_TRANSPORT_TYPE_ISER;
+ attr.it_ops = &iser_transport_ops;
+ attr.it_caps = &iser_transport_caps;
+
+ status = idm_transport_register(&attr);
+ if (status != IDM_STATUS_SUCCESS) {
+ ISER_LOG(CE_WARN, "Failed to register iSER transport with IDM");
+ return (DDI_FAILURE);
+ }
+
+ ISER_LOG(CE_NOTE, "Registered iSER transport with IDM");
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * iser_ini_conn_create()
+ * Allocate an iSER initiator connection context
+ */
+static idm_status_t
+iser_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
+{
+ iser_chan_t *iser_chan = NULL;
+ iser_conn_t *iser_conn;
+
+ /* Allocate and set up a connection handle */
+ iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP);
+ mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Allocate and open a channel to the target node */
+ iser_chan = iser_channel_alloc(NULL, &cr->cr_ini_dst_addr);
+ if (iser_chan == NULL) {
+ ISER_LOG(CE_WARN, "iser: failed to allocate channel");
+ mutex_destroy(&iser_conn->ic_lock);
+ kmem_free(iser_conn, sizeof (iser_conn_t));
+ return (IDM_STATUS_FAIL);
+ }
+
+ /*
+ * The local IP and remote IP are filled in iser_channel_alloc. The
+ * remote port needs to be filled in from idm_conn_req_t. The local
+ * port is irrelevant. Internal representation of the port in the
+ * IDM sockaddr structure is in network byte order. IBT expects the
+ * port in host byte order.
+ */
+ switch (cr->cr_ini_dst_addr.sin.sa_family) {
+ case AF_INET:
+ iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin4.sin_port);
+ break;
+ case AF_INET6:
+ iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin6.sin6_port);
+ break;
+ default:
+ iser_chan->ic_rport = ISCSI_LISTEN_PORT;
+ }
+ iser_chan->ic_lport = 0;
+
+ cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL);
+ iser_conn->ic_type = ISER_CONN_TYPE_INI;
+ iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED;
+ iser_conn->ic_chan = iser_chan;
+ iser_conn->ic_idmc = ic;
+
+ /*
+ * Set a pointer to the iser_conn in the iser_chan for easy
+ * access during CM event handling
+ */
+ iser_chan->ic_conn = iser_conn;
+
+ /* Set the iSER conn handle in the IDM conn private handle */
+ ic->ic_transport_private = (void *)iser_conn;
+
+ /* Set the transport header length */
+ ic->ic_transport_hdrlen = ISER_HEADER_LENGTH;
+
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_internal_conn_destroy()
+ * Tear down iSER-specific connection resources. This is used below
+ * in iser_conn_destroy(), but also from the CM code when we may have
+ * some of the connection established, but not fully connected.
+ */
+void
+iser_internal_conn_destroy(iser_conn_t *ic)
+{
+ mutex_enter(&ic->ic_lock);
+ iser_channel_free(ic->ic_chan);
+ if ((ic->ic_type == ISER_CONN_TYPE_TGT) &&
+ (ic->ic_stage == ISER_CONN_STAGE_ALLOCATED)) {
+ /*
+ * This is a target connection that has yet to be
+ * established. Free our reference on the target
+ * service handle.
+ */
+ iser_tgt_svc_rele(ic->ic_idms->is_iser_svc);
+ }
+ cv_destroy(&ic->ic_stage_cv);
+ mutex_exit(&ic->ic_lock);
+ mutex_destroy(&ic->ic_lock);
+ kmem_free(ic, sizeof (iser_conn_t));
+}
+
+/*
+ * iser_conn_destroy()
+ * Tear down an initiator or target connection.
+ */
+static void
+iser_conn_destroy(idm_conn_t *ic)
+{
+ iser_conn_t *iser_conn;
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+
+ iser_internal_conn_destroy(iser_conn);
+ ic->ic_transport_private = NULL;
+}
+
+/*
+ * iser_ini_conn_connect()
+ * Establish the connection referred to by the handle previously allocated via
+ * iser_ini_conn_create().
+ */
+static idm_status_t
+iser_ini_conn_connect(idm_conn_t *ic)
+{
+ iser_conn_t *iser_conn;
+ iser_status_t status;
+
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+
+ status = iser_channel_open(iser_conn->ic_chan);
+ if (status != ISER_STATUS_SUCCESS) {
+ ISER_LOG(CE_WARN, "iser: failed to open channel");
+ return (IDM_STATUS_FAIL);
+ }
+
+ /*
+ * Set the local and remote addresses in the idm conn handle.
+ */
+ iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr,
+ &iser_conn->ic_chan->ic_localip, iser_conn->ic_chan->ic_lport);
+ iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr,
+ &iser_conn->ic_chan->ic_remoteip, iser_conn->ic_chan->ic_rport);
+
+ mutex_enter(&iser_conn->ic_lock);
+ /* Hold a reference on the IDM connection handle */
+ idm_conn_hold(ic);
+ iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED;
+ mutex_exit(&iser_conn->ic_lock);
+
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_conn_disconnect()
+ * Shutdown this iSER connection
+ */
+static void
+iser_conn_disconnect(idm_conn_t *ic)
+{
+ iser_conn_t *iser_conn;
+
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+
+ mutex_enter(&iser_conn->ic_lock);
+ iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
+ mutex_exit(&iser_conn->ic_lock);
+
+ /* Close the channel */
+ iser_channel_close(iser_conn->ic_chan);
+
+ /* Free our reference held on the IDM conn handle, and set CLOSED */
+ mutex_enter(&iser_conn->ic_lock);
+ idm_conn_rele(iser_conn->ic_idmc);
+ iser_conn->ic_stage = ISER_CONN_STAGE_CLOSED;
+ mutex_exit(&iser_conn->ic_lock);
+}
+
+/*
+ * iser_tgt_svc_create()
+ * Establish the CM service for inbound iSER service requests on the port
+ * indicated by sr->sr_port.
+ * idm_svc_req_t contains the service parameters.
+ */
+idm_status_t
+iser_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
+{
+ iser_svc_t *iser_svc;
+
+ int rc;
+
+ iser_svc = kmem_zalloc(sizeof (iser_svc_t), KM_SLEEP);
+ is->is_iser_svc = (void *)iser_svc;
+
+ idm_refcnt_init(&iser_svc->is_refcnt, iser_svc);
+
+ list_create(&iser_svc->is_sbindlist, sizeof (iser_sbind_t),
+ offsetof(iser_sbind_t, is_list_node));
+ iser_svc->is_svcid = ibt_get_ip_sid(IPPROTO_TCP, sr->sr_port);
+
+ /*
+ * Register an iSER target service for the requested port
+ * and set the iser_svc structure in the idm_svc handle.
+ */
+ rc = iser_register_service(is);
+ if (rc != DDI_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_tgt_svc_create: iser_register_service "
+ "failed on port (%d): rc (0x%x)", sr->sr_port, rc);
+ ibt_release_ip_sid(iser_svc->is_svcid);
+ list_destroy(&iser_svc->is_sbindlist);
+ idm_refcnt_destroy(&iser_svc->is_refcnt);
+ kmem_free(iser_svc, sizeof (iser_svc_t));
+ return (IDM_STATUS_FAIL);
+ }
+
+ return (IDM_STATUS_SUCCESS);
+}
+
+/* IDM refcnt utilities for the iSER service handle */
+void
+iser_tgt_svc_hold(iser_svc_t *is)
+{
+ idm_refcnt_hold(&is->is_refcnt);
+}
+
+void
+iser_tgt_svc_rele(iser_svc_t *is)
+{
+ idm_refcnt_rele(&is->is_refcnt);
+}
+
+/*
+ * iser_tgt_svc_destroy()
+ * Teardown resources allocated in iser_tgt_svc_create()
+ */
+static void
+iser_tgt_svc_destroy(idm_svc_t *is)
+{
+ iser_svc_t *iser_svc;
+
+ iser_svc = (iser_svc_t *)is->is_iser_svc;
+
+ /*
+ * Deregister the iSER target service on this port and free
+ * the iser_svc structure from the idm_svc handle.
+ */
+ iser_deregister_service(is);
+
+ /* Wait for the iSER service handle's refcnt to zero */
+ idm_refcnt_wait_ref(&iser_svc->is_refcnt);
+
+ list_destroy(&iser_svc->is_sbindlist);
+
+ idm_refcnt_destroy(&iser_svc->is_refcnt);
+
+ kmem_free(iser_svc, sizeof (iser_svc_t));
+}
+
+/*
+ * iser_tgt_svc_online()
+ * Bind the CM service allocated via iser_tgt_svc_create().
+ */
+static idm_status_t
+iser_tgt_svc_online(idm_svc_t *is)
+{
+ iser_status_t status;
+
+ mutex_enter(&is->is_mutex);
+
+ /*
+ * Pass the IDM service handle as the client private data for
+ * later use.
+ */
+ status = iser_bind_service(is);
+ if (status != ISER_STATUS_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_tgt_svc_online: failed bind service");
+ mutex_exit(&is->is_mutex);
+ return (IDM_STATUS_FAIL);
+ }
+
+ mutex_exit(&is->is_mutex);
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_tgt_svc_offline
+ * Unbind the service on all available HCA ports.
+ */
+static void
+iser_tgt_svc_offline(idm_svc_t *is)
+{
+ mutex_enter(&is->is_mutex);
+
+ iser_unbind_service(is);
+ mutex_exit(&is->is_mutex);
+
+}
+
+/*
+ * iser_tgt_conn_connect()
+ * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
+ * is invoked from the SM as a result of an inbound connection request.
+ */
+/* ARGSUSED */
+static idm_status_t
+iser_tgt_conn_connect(idm_conn_t *ic)
+{
+ /* No action required */
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_tgt_enable_datamover() sets the transport private data on the
+ * idm_conn_t and move the conn stage to indicate logged in.
+ */
+static idm_status_t
+iser_tgt_enable_datamover(idm_conn_t *ic)
+{
+ iser_conn_t *iser_conn;
+
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+ mutex_enter(&iser_conn->ic_lock);
+
+ iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN;
+ mutex_exit(&iser_conn->ic_lock);
+
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_ini_enable_datamover() is used by the iSCSI initator to request that a
+ * specified iSCSI connection be transitioned to iSER-assisted mode.
+ * In the case of iSER, the RDMA resources for a reliable connection have
+ * already been allocated at this time, and the 'RDMAExtensions' is set to 'Yes'
+ * so no further negotiations are required at this time.
+ * The initiator now sends the first iSER Message - 'Hello' to the target
+ * and waits for the 'HelloReply' Message from the target before directing
+ * the initiator to go into the Full Feature Phase.
+ *
+ * No transport op is required on the target side.
+ */
+static idm_status_t
+iser_ini_enable_datamover(idm_conn_t *ic)
+{
+
+ iser_conn_t *iser_conn;
+ clock_t delay;
+ int status;
+
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+
+ mutex_enter(&iser_conn->ic_lock);
+ iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT;
+ mutex_exit(&iser_conn->ic_lock);
+
+ /* Send the iSER Hello Message to the target */
+ status = iser_xfer_hello_msg(iser_conn->ic_chan);
+ if (status != ISER_STATUS_SUCCESS) {
+
+ mutex_enter(&iser_conn->ic_lock);
+ iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT_FAIL;
+ mutex_exit(&iser_conn->ic_lock);
+
+ return (IDM_STATUS_FAIL);
+ }
+
+ /*
+ * Acquire the iser_conn->ic_lock and wait for the iSER HelloReply
+ * Message from the target, i.e. iser_conn_stage_t to be set to
+ * ISER_CONN_STAGE_HELLOREPLY_RCV. If the handshake does not
+ * complete within a specified time period (.5s), then return failure.
+ *
+ */
+ delay = ddi_get_lbolt() + drv_usectohz(500000);
+
+ mutex_enter(&iser_conn->ic_lock);
+ while ((iser_conn->ic_stage != ISER_CONN_STAGE_HELLOREPLY_RCV) &&
+ (ddi_get_lbolt() < delay)) {
+
+ (void) cv_timedwait(&iser_conn->ic_stage_cv,
+ &iser_conn->ic_lock, delay);
+ }
+
+ switch (iser_conn->ic_stage) {
+ case ISER_CONN_STAGE_HELLOREPLY_RCV:
+ iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN;
+ mutex_exit(&iser_conn->ic_lock);
+ /*
+ * Return suceess to indicate that the initiator connection can
+ * go to the next phase - FFP
+ */
+ return (IDM_STATUS_SUCCESS);
+ default:
+ iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL;
+ mutex_exit(&iser_conn->ic_lock);
+ return (IDM_STATUS_FAIL);
+
+ }
+
+ /* STATEMENT_NEVER_REACHED */
+}
+
+/*
+ * iser_free_task_rsrcs()
+ * This routine does not currently need to do anything. It is used in
+ * the sockets transport to explicitly complete any buffers on the task,
+ * but we can rely on our RCaP layer to finish up it's work without any
+ * intervention.
+ */
+/* ARGSUSED */
+idm_status_t
+iser_free_task_rsrcs(idm_task_t *idt)
+{
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_negotiate_key_values() validates the key values for this connection
+ */
+/* ARGSUSED */
+static kv_status_t
+iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
+ nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
+{
+ kv_status_t kvrc = KV_HANDLED;
+
+ /* Process the request nvlist */
+ kvrc = iser_process_request_nvlist(request_nvl, response_nvl,
+ negotiated_nvl);
+
+ /* We must be using RDMA, so set the flag on the ic handle */
+ ic->ic_rdma_extensions = B_TRUE;
+
+ return (kvrc);
+}
+
+/* Process a list of key=value pairs from a login request */
+static kv_status_t
+iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl,
+ nvlist_t *negotiated_nvl)
+{
+ const idm_kv_xlate_t *ikvx;
+ char *nvp_name;
+ nvpair_t *nvp;
+ nvpair_t *next_nvp;
+ kv_status_t kvrc = KV_HANDLED;
+ boolean_t transit = B_TRUE;
+
+ /* Process the list */
+ nvp = nvlist_next_nvpair(request_nvl, NULL);
+ while (nvp != NULL) {
+ next_nvp = nvlist_next_nvpair(request_nvl, nvp);
+
+ nvp_name = nvpair_name(nvp);
+ ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
+
+ kvrc = iser_handle_key(nvp, ikvx, request_nvl, response_nvl,
+ negotiated_nvl);
+ if (kvrc != KV_HANDLED) {
+ if (kvrc == KV_HANDLED_NO_TRANSIT) {
+ /* we countered, clear the transit flag */
+ transit = B_FALSE;
+ } else {
+ /* error, bail out */
+ break;
+ }
+ }
+
+ nvp = next_nvp;
+ }
+ /*
+ * If the current kv_status_t indicates success, we've handled
+ * the entire list. Explicitly set kvrc to NO_TRANSIT if we've
+ * cleared the transit flag along the way.
+ */
+ if ((kvrc == KV_HANDLED) && (transit == B_FALSE)) {
+ kvrc = KV_HANDLED_NO_TRANSIT;
+ }
+
+ return (kvrc);
+}
+
+/* Handle a given list, boolean or numerical key=value pair */
+static kv_status_t
+iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx,
+ nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
+{
+ kv_status_t kvrc = KV_UNHANDLED;
+ boolean_t bool_val;
+ uint64_t num_val;
+ int nvrc;
+
+ /* Retrieve values for booleans and numericals */
+ switch (ikvx->ik_key_id) {
+ /* Booleans */
+ case KI_RDMA_EXTENSIONS:
+ case KI_IMMEDIATE_DATA:
+ case KI_IFMARKER:
+ case KI_OFMARKER:
+ nvrc = nvpair_value_boolean_value(nvp, &bool_val);
+ ASSERT(nvrc == 0);
+ break;
+ /* Numericals */
+ case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH:
+ case KI_TARGET_RECV_DATA_SEGMENT_LENGTH:
+ case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS:
+ nvrc = nvpair_value_uint64(nvp, &num_val);
+ ASSERT(nvrc == 0);
+ break;
+ default:
+ break;
+ }
+
+ /* Now handle the values according to the key name */
+ switch (ikvx->ik_key_id) {
+ case KI_HEADER_DIGEST:
+ case KI_DATA_DIGEST:
+ /* Ensure "None" */
+ kvrc = iser_handle_digest(nvp, ikvx, request_nvl, response_nvl,
+ negotiated_nvl);
+ break;
+ case KI_RDMA_EXTENSIONS:
+ /* Ensure "Yes" */
+ kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_TRUE,
+ request_nvl, response_nvl, negotiated_nvl);
+ break;
+ case KI_TARGET_RECV_DATA_SEGMENT_LENGTH:
+ /* Validate the proposed value */
+ kvrc = iser_handle_numerical(nvp, num_val, ikvx,
+ ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN,
+ ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX,
+ ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX,
+ request_nvl, response_nvl, negotiated_nvl);
+ break;
+ case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH:
+ /* Validate the proposed value */
+ kvrc = iser_handle_numerical(nvp, num_val, ikvx,
+ ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN,
+ ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX,
+ ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX,
+ request_nvl, response_nvl, negotiated_nvl);
+ break;
+ case KI_IMMEDIATE_DATA:
+ case KI_OFMARKER:
+ case KI_IFMARKER:
+ /* Ensure "No" */
+ kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_FALSE,
+ request_nvl, response_nvl, negotiated_nvl);
+ break;
+ case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS:
+ /* Validate the proposed value */
+ kvrc = iser_handle_numerical(nvp, num_val, ikvx,
+ ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN,
+ ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX,
+ ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX,
+ request_nvl, response_nvl, negotiated_nvl);
+ break;
+ default:
+ /*
+ * All other keys, including invalid keys, will be
+ * handled at the client layer.
+ */
+ kvrc = KV_HANDLED;
+ break;
+ }
+
+ return (kvrc);
+}
+
+/* Ensure that "None" is an option in the digest list, and select it */
+static kv_status_t
+iser_handle_digest(nvpair_t *choices, const idm_kv_xlate_t *ikvx,
+ nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
+{
+ kv_status_t kvrc = KV_VALUE_ERROR;
+ int nvrc = 0;
+ nvpair_t *digest_choice;
+ char *digest_choice_string;
+
+ /*
+ * Loop through all digest choices. We need to enforce no
+ * "None" for both header and data digest. If we find our
+ * required value, add the value to our negotiated values list
+ * and respond with that value in the login response. If not,
+ * indicate a value error for the iSCSI layer to work with.
+ */
+ digest_choice = idm_get_next_listvalue(choices, NULL);
+ while (digest_choice != NULL) {
+ nvrc = nvpair_value_string(digest_choice,
+ &digest_choice_string);
+ ASSERT(nvrc == 0);
+
+ if (strcasecmp(digest_choice_string, "none") == 0) {
+ /* Add to negotiated values list */
+ nvrc = nvlist_add_string(negotiated_nvl,
+ ikvx->ik_key_name, digest_choice_string);
+ kvrc = idm_nvstat_to_kvstat(nvrc);
+ if (nvrc == 0) {
+ /* Add to login response list */
+ nvrc = nvlist_add_string(response_nvl,
+ ikvx->ik_key_name, digest_choice_string);
+ kvrc = idm_nvstat_to_kvstat(nvrc);
+ /* Remove from the request (we've handled it) */
+ (void) nvlist_remove_all(request_nvl,
+ ikvx->ik_key_name);
+ }
+ break;
+ }
+ digest_choice = idm_get_next_listvalue(choices,
+ digest_choice);
+ }
+
+ ASSERT(digest_choice != NULL);
+
+ return (kvrc);
+}
+
+/* Validate a proposed boolean value, and set the alternate if necessary */
+static kv_status_t
+iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx,
+ boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl,
+ nvlist_t *negotiated_nvl)
+{
+ kv_status_t kvrc;
+ int nvrc;
+ boolean_t respond;
+
+ if (value != iser_value) {
+ /*
+ * Respond back to initiator with our value, and
+ * set the return value to unset the transit bit.
+ */
+ value = iser_value;
+ kvrc = KV_HANDLED_NO_TRANSIT;
+ nvrc = 0;
+ respond = B_TRUE;
+
+ } else {
+ /* Add this to our negotiated values */
+ nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
+ /* Respond if this is not a declarative */
+ respond = (ikvx->ik_declarative == B_FALSE);
+ }
+
+ /* Response of Simple-value Negotiation */
+ if (nvrc == 0 && respond) {
+ nvrc = nvlist_add_boolean_value(response_nvl,
+ ikvx->ik_key_name, value);
+ /* Remove from the request (we've handled it) */
+ (void) nvlist_remove_all(request_nvl, ikvx->ik_key_name);
+ }
+
+ if (kvrc == KV_HANDLED_NO_TRANSIT) {
+ return (kvrc);
+ }
+
+ return (idm_nvstat_to_kvstat(nvrc));
+}
+
+/*
+ * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and
+ * maximum values, and set an alternate, if necessary. Note that the value
+ * 'iser_max_value" represents our implementation maximum (typically the max).
+ */
+static kv_status_t
+iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx,
+ uint64_t min_value, uint64_t max_value, uint64_t iser_max_value,
+ nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
+{
+ kv_status_t kvrc;
+ int nvrc;
+ boolean_t respond;
+
+ /* Validate against standard */
+ if ((value < min_value) || (value > max_value)) {
+ kvrc = KV_VALUE_ERROR;
+ } else {
+ if (value > iser_max_value) {
+ /*
+ * Respond back to initiator with our value, and
+ * set the return value to unset the transit bit.
+ */
+ value = iser_max_value;
+ kvrc = KV_HANDLED_NO_TRANSIT;
+ nvrc = 0;
+ respond = B_TRUE;
+ } else {
+ /* Add this to our negotiated values */
+ nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
+ /* Respond if this is not a declarative */
+ respond = (ikvx->ik_declarative == B_FALSE);
+ }
+
+ /* Response of Simple-value Negotiation */
+ if (nvrc == 0 && respond) {
+ nvrc = nvlist_add_uint64(response_nvl,
+ ikvx->ik_key_name, value);
+ /* Remove from the request (we've handled it) */
+ (void) nvlist_remove_all(request_nvl,
+ ikvx->ik_key_name);
+ }
+ }
+
+ if (kvrc == KV_HANDLED_NO_TRANSIT) {
+ return (kvrc);
+ }
+
+ return (idm_nvstat_to_kvstat(nvrc));
+}
+
+/*
+ * iser_notice_key_values() activates the negotiated key values for
+ * this connection.
+ */
+static void
+iser_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
+{
+ iser_conn_t *iser_conn;
+ boolean_t boolean_val;
+ uint64_t uint64_val;
+ int nvrc;
+
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+
+ /*
+ * Validate the final negotiated operational parameters,
+ * and save a copy.
+ */
+ if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
+ "HeaderDigest", &boolean_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_header_digest = boolean_val;
+ }
+
+ if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
+ "DataDigest", &boolean_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_data_digest = boolean_val;
+ }
+
+ if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
+ "RDMAExtensions", &boolean_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_rdma_extensions = boolean_val;
+ }
+
+ if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
+ "OFMarker", &boolean_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_ofmarker = boolean_val;
+ }
+
+ if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
+ "IFMarker", &boolean_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_ifmarker = boolean_val;
+ }
+
+ if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
+ "TargetRecvDataSegmentLength", &uint64_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_target_recv_data_segment_length =
+ uint64_val;
+ }
+
+ if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
+ "InitiatorRecvDataSegmentLength", &uint64_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_initiator_recv_data_segment_length =
+ uint64_val;
+ }
+
+ if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
+ "MaxOutstandingUnexpectedPDUs", &uint64_val)) != ENOENT) {
+ ASSERT(nvrc == 0);
+ iser_conn->ic_op_params.op_max_outstanding_unexpected_pdus =
+ uint64_val;
+ }
+
+ /* Test boolean values which are required by RFC 5046 */
+#ifdef ISER_DEBUG
+ ASSERT(iser_conn->ic_op_params.op_rdma_extensions == B_TRUE);
+ ASSERT(iser_conn->ic_op_params.op_header_digest == B_FALSE);
+ ASSERT(iser_conn->ic_op_params.op_data_digest == B_FALSE);
+ ASSERT(iser_conn->ic_op_params.op_ofmarker == B_FALSE);
+ ASSERT(iser_conn->ic_op_params.op_ifmarker == B_FALSE);
+#endif
+}
+
+
+/*
+ * iser_conn_is_capable() verifies that the passed connection is provided
+ * for by an iSER-capable link.
+ * NOTE: When utilizing InfiniBand RC as an RCaP, this routine will check
+ * if the link is on IPoIB. This only indicates a chance that the link is
+ * on an RCaP, and thus iSER-capable, since we may be running on an IB-Eth
+ * gateway, or other IB but non-RCaP link. Rather than fully establishing the
+ * link to verify RCaP here, we instead will return B_TRUE
+ * indicating the link is iSER-capable, if the link is IPoIB. If then in
+ * iser_ini_conn_create() the link proves not be RCaP, IDM will fall back
+ * to using the IDM Sockets transport.
+ */
+/* ARGSUSED */
+static boolean_t
+iser_conn_is_capable(idm_conn_req_t *cr, idm_transport_caps_t *caps)
+{
+ /* A NULL value for laddr indicates implicit source */
+ return (iser_path_exists(NULL, &cr->cr_ini_dst_addr));
+}
+
+/*
+ * iser_pdu_tx() transmits a Control PDU via the iSER channel. We pull the
+ * channel out of the idm_conn_t passed in, and pass it and the pdu to the
+ * iser_xfer routine.
+ */
+static void
+iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu)
+{
+ iser_conn_t *iser_conn;
+ iser_status_t iser_status;
+
+ iser_conn = (iser_conn_t *)ic->ic_transport_private;
+
+ iser_status = iser_xfer_ctrlpdu(iser_conn->ic_chan, pdu);
+ if (iser_status != ISER_STATUS_SUCCESS) {
+ ISER_LOG(CE_WARN, "iser_pdu_tx: failed iser_xfer_ctrlpdu: "
+ "ic (0x%p) pdu (0x%p)", (void *) ic, (void *) pdu);
+ /* Fail this PDU transmission */
+ idm_pdu_complete(pdu, IDM_STATUS_FAIL);
+ }
+
+ /*
+ * We successfully posted this PDU for transmission.
+ * The completion handler will invoke idm_pdu_complete()
+ * with the completion status. See iser_cq.c for more
+ * information.
+ */
+}
+
+/*
+ * iser_buf_tx_to_ini() transmits the data buffer encoded in idb to the
+ * initiator to fulfill SCSI Read commands. An iser_xfer routine is invoked
+ * to implement the RDMA operations.
+ *
+ * Caller holds idt->idt_mutex.
+ */
+static idm_status_t
+iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
+{
+ iser_status_t iser_status;
+ idm_status_t idm_status = IDM_STATUS_SUCCESS;
+
+ ASSERT(mutex_owned(&idt->idt_mutex));
+
+ iser_status = iser_xfer_buf_to_ini(idt, idb);
+
+ if (iser_status != ISER_STATUS_SUCCESS) {
+ ISER_LOG(CE_WARN, "iser_buf_tx_to_ini: failed "
+ "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)",
+ (void *) idt, (void *) idb);
+ idm_status = IDM_STATUS_FAIL;
+ }
+
+ /*
+ * iSCSIt's Data Completion Notify callback is invoked from
+ * the Work Request Send completion Handler
+ */
+
+ mutex_exit(&idt->idt_mutex);
+ return (idm_status);
+}
+
+/*
+ * iser_buf_tx_from_ini() transmits data from the initiator into the buffer
+ * in idb to fulfill SCSI Write commands. An iser_xfer routine is invoked
+ * to implement the RDMA operations.
+ *
+ * Caller holds idt->idt_mutex.
+ */
+static idm_status_t
+iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
+{
+ iser_status_t iser_status;
+ idm_status_t idm_status = IDM_STATUS_SUCCESS;
+
+ ASSERT(mutex_owned(&idt->idt_mutex));
+
+ iser_status = iser_xfer_buf_from_ini(idt, idb);
+
+ if (iser_status != ISER_STATUS_SUCCESS) {
+ ISER_LOG(CE_WARN, "iser_buf_tx_from_ini: failed "
+ "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)",
+ (void *) idt, (void *) idb);
+ idm_status = IDM_STATUS_FAIL;
+ }
+
+ /*
+ * iSCSIt's Data Completion Notify callback is invoked from
+ * the Work Request Send completion Handler
+ */
+
+ mutex_exit(&idt->idt_mutex);
+ return (idm_status);
+}
+
+/*
+ * iser_buf_alloc() allocates a buffer and registers it with the IBTF for
+ * use with iSER. Each HCA has it's own kmem cache for establishing a pool
+ * of registered buffers, when once initially allocated, will remain
+ * registered with the HCA. This routine is invoked only on the target,
+ * where we have the requirement to pre-allocate buffers for the upper layers.
+ * Note: buflen is compared to ISER_DEFAULT_BUFLEN, and allocation is failed
+ * if the requested buflen is larger than our default.
+ */
+/* ARGSUSED */
+static idm_status_t
+iser_buf_alloc(idm_buf_t *idb, uint64_t buflen)
+{
+ iser_conn_t *iser_conn;
+ iser_hca_t *iser_hca;
+ iser_buf_t *iser_buf;
+
+ if (buflen > ISER_DEFAULT_BUFLEN) {
+ return (IDM_STATUS_FAIL);
+ }
+
+ iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
+ iser_hca = iser_conn->ic_chan->ic_hca;
+
+ /*
+ * Allocate a buffer from this HCA's cache. Once initialized, these
+ * will remain allocated and registered (see above).
+ */
+ iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP);
+ if (iser_buf == NULL) {
+ ISER_LOG(CE_NOTE, "iser_buf_alloc: alloc failed");
+ return (IDM_STATUS_FAIL);
+ }
+
+ /* Set the allocated data buffer pointer in the IDM buf handle */
+ idb->idb_buf = iser_buf->buf;
+
+ /* Set the private buf and reg handles in the IDM buf handle */
+ idb->idb_buf_private = (void *)iser_buf;
+ idb->idb_reg_private = (void *)iser_buf->iser_mr;
+
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iser_buf_free() frees the buffer handle passed in. Note that the cached
+ * kmem object has an HCA-registered buffer in it which will not be freed.
+ * This allows us to build up a cache of pre-allocated and registered
+ * buffers for use on the target.
+ */
+static void
+iser_buf_free(idm_buf_t *buf)
+{
+ iser_buf_t *iser_buf;
+
+ iser_buf = buf->idb_buf_private;
+ kmem_cache_free(iser_buf->cache, iser_buf);
+}
+
+/*
+ * iser_buf_setup() is invoked on the initiator in order to register memory
+ * on demand for use with the iSER layer.
+ */
+static idm_status_t
+iser_buf_setup(idm_buf_t *idb)
+{
+ iser_conn_t *iser_conn;
+ iser_chan_t *iser_chan;
+ iser_hca_t *iser_hca;
+ iser_buf_t *iser_buf;
+ int status;
+
+ ASSERT(idb->idb_buf != NULL);
+
+ iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
+ ASSERT(iser_conn != NULL);
+
+ iser_hca = iser_conn->ic_chan->ic_hca;
+
+ iser_chan = iser_conn->ic_chan;
+ ASSERT(iser_chan != NULL);
+
+ /*
+ * Memory registration is known to be slow, so for small
+ * transfers, use pre-registered memory buffers and just
+ * copy the data into/from them at the appropriate time
+ */
+ if (idb->idb_buflen < ISER_BCOPY_THRESHOLD) {
+ iser_buf =
+ kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP);
+
+ if (iser_buf == NULL) {
+
+ /* Fail over to dynamic registration */
+ status = iser_reg_rdma_mem(iser_chan->ic_hca, idb);
+ idb->idb_bufalloc = B_FALSE;
+ return (status);
+ }
+
+ /*
+ * Set the allocated data buffer pointer in the IDM buf handle
+ * Data is to be copied from/to this buffer using bcopy
+ */
+ idb->idb_bufptr = idb->idb_buf;
+ idb->idb_bufbcopy = B_TRUE;
+
+ idb->idb_buf = iser_buf->buf;
+
+ /* Set the private buf and reg handles in the IDM buf handle */
+ idb->idb_buf_private = (void *)iser_buf;
+ idb->idb_reg_private = (void *)iser_buf->iser_mr;
+
+ /* Ensure bufalloc'd flag is set */
+ idb->idb_bufalloc = B_TRUE;
+
+ return (IDM_STATUS_SUCCESS);
+
+ } else {
+
+ /* Dynamically register the memory passed in on the idb */
+ status = iser_reg_rdma_mem(iser_chan->ic_hca, idb);
+
+ /* Ensure bufalloc'd flag is unset */
+ idb->idb_bufalloc = B_FALSE;
+
+ return (status);
+ }
+}
+
+/*
+ * iser_buf_teardown() is invoked on the initiator in order to register memory
+ * on demand for use with the iSER layer.
+ */
+static void
+iser_buf_teardown(idm_buf_t *idb)
+{
+ iser_conn_t *iser_conn;
+
+ iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
+
+ /* Deregister the memory passed in on the idb */
+ iser_dereg_rdma_mem(iser_conn->ic_chan->ic_hca, idb);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser_resource.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,605 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+
+#include <sys/ib/clients/iser/iser.h>
+
+/*
+ * iser_resource.c
+ * Routines for allocating resources for iSER
+ */
+
+static iser_mr_t *iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize,
+ ibt_mr_flags_t mr_flags);
+
+static void iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr);
+
+static iser_mr_t *iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr,
+ ib_memlen_t len, ibt_mr_flags_t mr_flags);
+
+static void iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr);
+
+static int iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2);
+
+/*
+ * iser_init_hca_caches()
+ * Invoked per HCA instance initialization, to establish HCA-wide
+ * message and buffer kmem caches. Note we'll uniquify cache names
+ * with the lower 32-bits of the HCA GUID.
+ */
+void
+iser_init_hca_caches(iser_hca_t *hca)
+{
+ char name[ISER_CACHE_NAMELEN];
+
+ (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_pool_%08x",
+ (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
+ hca->hca_msg_pool = iser_vmem_create(name, hca, ISER_MSG_MR_CHUNKSIZE,
+ ISER_MSG_POOL_MAX, ISER_MSG_MR_FLAGS);
+ (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_cache_%08x",
+ (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
+ hca->iser_msg_cache = kmem_cache_create(name, sizeof (iser_msg_t),
+ 0, &iser_msg_cache_constructor, &iser_msg_cache_destructor,
+ NULL, hca, NULL, KM_SLEEP);
+
+ (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_pool_%08x",
+ (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
+ hca->hca_buf_pool = iser_vmem_create(name, hca, ISER_BUF_MR_CHUNKSIZE,
+ ISER_BUF_POOL_MAX, ISER_BUF_MR_FLAGS);
+ (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_cache_%08x",
+ (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
+ hca->iser_buf_cache = kmem_cache_create(name, sizeof (iser_buf_t),
+ 0, &iser_buf_cache_constructor, &iser_buf_cache_destructor,
+ NULL, hca, NULL, KM_SLEEP);
+}
+
+/*
+ * iser_fini_hca_caches()
+ * Invoked per HCA instance teardown, this routine cleans up the
+ * message and buffer handle caches.
+ */
+void
+iser_fini_hca_caches(iser_hca_t *hca)
+{
+ kmem_cache_destroy(hca->iser_buf_cache);
+ iser_vmem_destroy(hca->hca_buf_pool);
+ kmem_cache_destroy(hca->iser_msg_cache);
+ iser_vmem_destroy(hca->hca_msg_pool);
+}
+
+/*
+ * Allocate and initialize an iSER WR handle
+ */
+iser_wr_t *
+iser_wr_get()
+{
+ iser_wr_t *iser_wr;
+
+ iser_wr = kmem_cache_alloc(iser_state->iser_wr_cache, KM_NOSLEEP);
+ if (iser_wr != NULL) {
+ iser_wr->iw_type = ISER_WR_UNDEFINED;
+ iser_wr->iw_msg = NULL;
+ iser_wr->iw_buf = NULL;
+ iser_wr->iw_pdu = NULL;
+ }
+
+ return (iser_wr);
+}
+
+/*
+ * Free an iSER WR handle back to the global cache
+ */
+void
+iser_wr_free(iser_wr_t *iser_wr)
+{
+ kmem_cache_free(iser_state->iser_wr_cache, iser_wr);
+}
+
+/*
+ * iser_msg_cache_constructor()
+ * Allocate and register memory for an iSER Control-type PDU message.
+ * The cached objects will retain this memory registration in the HCA,
+ * and thus provide a cache of pre-allocated and registered messages
+ * for use in iSER.
+ */
+/* ARGSUSED */
+int
+iser_msg_cache_constructor(void *msg_void, void *arg, int flags)
+{
+ void *memp = NULL;
+ int status;
+ iser_msg_t *msg = (iser_msg_t *)msg_void;
+ iser_hca_t *hca = (iser_hca_t *)arg;
+ iser_mr_t mr;
+
+ memp = iser_vmem_alloc(hca->hca_msg_pool, ISER_MAX_CTRLPDU_LEN);
+ if (memp == NULL) {
+ ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: "
+ "failed to allocate backing memory");
+ return (DDI_FAILURE);
+ }
+
+ /* Fill in iser_mr for the memory we just allocated */
+ status = iser_vmem_mr(hca->hca_msg_pool, memp,
+ ISER_MAX_CTRLPDU_LEN, &mr);
+ if (status != IDM_STATUS_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: "
+ "couldn't find mr for %p", memp);
+ iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN);
+ return (DDI_FAILURE);
+ }
+
+ msg->msg_ds.ds_va = (ib_vaddr_t)(uintptr_t)memp;
+ msg->msg_ds.ds_key = mr.is_mrlkey;
+
+ /* Set a backpointer to this cache to save a lookup on free */
+ msg->cache = hca->iser_msg_cache;
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Deregister and free registered memory from an iser_msg_t handle.
+ */
+void
+iser_msg_cache_destructor(void *mr, void *arg)
+{
+ iser_msg_t *msg = (iser_msg_t *)mr;
+ iser_hca_t *hca = (iser_hca_t *)arg;
+ uint8_t *memp;
+
+ memp = (uint8_t *)(uintptr_t)(ib_vaddr_t)msg->msg_ds.ds_va;
+ iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN);
+}
+
+/*
+ * Pull a msg handle off of hca's msg cache. If no object is available
+ * on the cache, a new message buffer will be allocated and registered
+ * with the HCA. Once freed, this message will not be unregistered, thus
+ * building up a cache of pre-allocated and registered message buffers
+ * over time.
+ */
+iser_msg_t *
+iser_msg_get(iser_hca_t *hca, int num, int *ret)
+{
+ iser_msg_t *tmp, *msg = NULL;
+ int i;
+
+ ASSERT(hca != NULL);
+
+ /*
+ * Pull num number of message handles off the cache, linking
+ * them if more than one have been requested.
+ */
+ for (i = 0; i < num; i++) {
+ tmp = kmem_cache_alloc(hca->iser_msg_cache, KM_NOSLEEP);
+ if (tmp == NULL) {
+ ISER_LOG(CE_NOTE, "iser_msg_get: alloc failed, "
+ "requested (%d) allocated (%d)", num, i);
+ break;
+ }
+ tmp->msg_ds.ds_len = ISER_MAX_CTRLPDU_LEN;
+ tmp->nextp = msg;
+ msg = tmp;
+ }
+
+ if (ret != NULL) {
+ *ret = i;
+ }
+
+ return (msg);
+}
+
+/*
+ * Free this msg back to its cache, leaving the memory contained by
+ * it registered for later re-use.
+ */
+void
+iser_msg_free(iser_msg_t *msg)
+{
+ kmem_cache_free(msg->cache, msg);
+}
+
+/*
+ * iser_buf_cache_constructor()
+ * Allocate and register memory for an iSER RDMA operation. The cached
+ * objects will retain this memory registration in the HCA, and thus
+ * provide a cache of pre-allocated and registered messages for use in
+ * iSER.
+ */
+/* ARGSUSED */
+int
+iser_buf_cache_constructor(void *mr, void *arg, int flags)
+{
+ uint8_t *memp;
+ idm_status_t status;
+ iser_buf_t *iser_buf = (iser_buf_t *)mr;
+ iser_hca_t *hca = (iser_hca_t *)arg;
+
+ /* Allocate an iser_mr handle for this buffer */
+ iser_buf->iser_mr = kmem_zalloc(sizeof (iser_mr_t), KM_NOSLEEP);
+ if (iser_buf->iser_mr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_buf_cache_constructor: "
+ "failed to allocate memory for iser_mr handle");
+ return (DDI_FAILURE);
+ }
+
+ memp = iser_vmem_alloc(hca->hca_buf_pool, ISER_DEFAULT_BUFLEN);
+ if (memp == NULL) {
+ kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t));
+ return (DDI_FAILURE);
+ }
+
+ /* Fill in iser_mr for the memory we just allocated */
+ status = iser_vmem_mr(hca->hca_buf_pool, memp, ISER_DEFAULT_BUFLEN,
+ iser_buf->iser_mr);
+
+ if (status != IDM_STATUS_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+
+ /* Set buf pointer and len for later manipulation (if necessary) */
+ iser_buf->buf = (uint64_t *)(uintptr_t)memp;
+ iser_buf->buflen = ISER_DEFAULT_BUFLEN;
+
+ /* Populate the SGE Vaddr and L_key for the xfer operation later */
+ iser_buf->buf_ds.ds_va = iser_buf->iser_mr->is_mrva;
+ iser_buf->buf_ds.ds_key = iser_buf->iser_mr->is_mrlkey;
+
+ /* Set a backpointer to this cache to save a lookup on free */
+ iser_buf->cache = hca->iser_buf_cache;
+
+ gethrestime(&iser_buf->buf_constructed);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Deregister and free registered memory from an iser_buf_t handle.
+ */
+void
+iser_buf_cache_destructor(void *mr, void *arg)
+{
+ iser_buf_t *iser_buf = (iser_buf_t *)mr;
+ iser_hca_t *hca = (iser_hca_t *)arg;
+
+ gethrestime(&iser_buf->buf_destructed);
+
+ iser_vmem_free(hca->hca_buf_pool, iser_buf->buf, iser_buf->buflen);
+
+ kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t));
+}
+
+/*
+ * Registration for initiator buffers
+ */
+int
+iser_reg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb)
+{
+ iser_mr_t *iser_mr = NULL;
+
+ ASSERT(idb != NULL);
+ ASSERT(idb->idb_buflen > 0);
+
+ iser_mr = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)idb->idb_buf,
+ idb->idb_buflen, ISER_BUF_MR_FLAGS | IBT_MR_NOSLEEP);
+ if (iser_mr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_reg_rdma_mem: failed to register "
+ "memory for idm_buf_t");
+ return (DDI_FAILURE);
+ }
+
+ idb->idb_reg_private = (void *)iser_mr;
+
+ return (DDI_SUCCESS);
+}
+
+void
+iser_dereg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb)
+{
+ iser_mr_t *mr;
+
+ ASSERT(idb != NULL);
+ mr = (iser_mr_t *)idb->idb_reg_private;
+
+ iser_dereg_mem(hca, mr);
+}
+
+iser_vmem_mr_pool_t *
+iser_vmem_create(const char *name, iser_hca_t *hca, ib_memlen_t chunksize,
+ uint64_t max_total_size, ibt_mr_flags_t arena_mr_flags)
+{
+ iser_mr_t *first_chunk;
+ iser_vmem_mr_pool_t *result;
+
+ ASSERT(chunksize <= max_total_size);
+ result = kmem_zalloc(sizeof (*result), KM_SLEEP);
+ result->ivmp_hca = hca;
+ result->ivmp_mr_flags = arena_mr_flags;
+ result->ivmp_chunksize = chunksize;
+ result->ivmp_max_total_size = max_total_size;
+ mutex_init(&result->ivmp_mutex, NULL, MUTEX_DRIVER, NULL);
+ avl_create(&result->ivmp_mr_list, iser_vmem_mr_compare,
+ sizeof (iser_mr_t), offsetof(iser_mr_t, is_avl_ln));
+
+ first_chunk = iser_vmem_chunk_alloc(hca, chunksize,
+ arena_mr_flags | IBT_MR_SLEEP);
+
+ avl_add(&result->ivmp_mr_list, first_chunk);
+ result->ivmp_total_size += chunksize;
+
+ result->ivmp_vmem = vmem_create(name,
+ (void *)(uintptr_t)first_chunk->is_mrva,
+ (size_t)first_chunk->is_mrlen, ISER_MR_QUANTSIZE,
+ NULL, NULL, NULL, 0, VM_SLEEP);
+
+ return (result);
+}
+
+void
+iser_vmem_destroy(iser_vmem_mr_pool_t *vmr_pool)
+{
+ iser_mr_t *chunk, *next_chunk;
+
+ mutex_enter(&vmr_pool->ivmp_mutex);
+ vmem_destroy(vmr_pool->ivmp_vmem);
+
+ for (chunk = avl_first(&vmr_pool->ivmp_mr_list); chunk != NULL;
+ chunk = next_chunk) {
+ next_chunk = AVL_NEXT(&vmr_pool->ivmp_mr_list, chunk);
+ avl_remove(&vmr_pool->ivmp_mr_list, chunk);
+ iser_vmem_chunk_free(vmr_pool->ivmp_hca, chunk);
+ }
+ mutex_exit(&vmr_pool->ivmp_mutex);
+
+ avl_destroy(&vmr_pool->ivmp_mr_list);
+ mutex_destroy(&vmr_pool->ivmp_mutex);
+
+ kmem_free(vmr_pool, sizeof (*vmr_pool));
+}
+
+void *
+iser_vmem_alloc(iser_vmem_mr_pool_t *vmr_pool, size_t size)
+{
+ void *result;
+ iser_mr_t *next_chunk;
+ ib_memlen_t chunk_len;
+ result = vmem_alloc(vmr_pool->ivmp_vmem, size,
+ VM_NOSLEEP | VM_FIRSTFIT);
+ if (result == NULL) {
+ mutex_enter(&vmr_pool->ivmp_mutex);
+ chunk_len = vmr_pool->ivmp_chunksize;
+ if ((vmr_pool->ivmp_total_size + chunk_len) >
+ vmr_pool->ivmp_max_total_size) {
+ /*
+ * Don't go over the pool size limit. We can allocate
+ * partial chunks so it's not always the case that
+ * current_size + chunk_size == max_total_size
+ */
+ if (vmr_pool->ivmp_total_size >=
+ vmr_pool->ivmp_max_total_size) {
+ mutex_exit(&vmr_pool->ivmp_mutex);
+ return (NULL);
+ } else {
+ chunk_len = vmr_pool->ivmp_max_total_size -
+ vmr_pool->ivmp_total_size;
+ }
+ }
+ next_chunk = iser_vmem_chunk_alloc(vmr_pool->ivmp_hca,
+ chunk_len, vmr_pool->ivmp_mr_flags | IBT_MR_NOSLEEP);
+ if (next_chunk != NULL) {
+ if (vmem_add(vmr_pool->ivmp_vmem,
+ (void *)(uintptr_t)next_chunk->is_mrva,
+ next_chunk->is_mrlen, VM_NOSLEEP) == NULL) {
+ /* Free the chunk we just allocated */
+ iser_vmem_chunk_free(vmr_pool->ivmp_hca,
+ next_chunk);
+ } else {
+ vmr_pool->ivmp_total_size +=
+ next_chunk->is_mrlen;
+ avl_add(&vmr_pool->ivmp_mr_list, next_chunk);
+ }
+
+ result = vmem_alloc(vmr_pool->ivmp_vmem, size,
+ VM_NOSLEEP | VM_FIRSTFIT);
+ }
+
+ mutex_exit(&vmr_pool->ivmp_mutex);
+ }
+
+ return (result);
+}
+
+
+void
+iser_vmem_free(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size)
+{
+ vmem_free(vmr_pool->ivmp_vmem, vaddr, size);
+}
+
+idm_status_t
+iser_vmem_mr(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size,
+ iser_mr_t *mr)
+{
+ avl_index_t where;
+ ib_vaddr_t mrva = (ib_vaddr_t)(uintptr_t)vaddr;
+ iser_mr_t search_chunk;
+ iser_mr_t *nearest_chunk;
+ ib_vaddr_t chunk_end;
+
+ mutex_enter(&vmr_pool->ivmp_mutex);
+ search_chunk.is_mrva = mrva;
+ nearest_chunk = avl_find(&vmr_pool->ivmp_mr_list, &search_chunk,
+ &where);
+ if (nearest_chunk == NULL) {
+ nearest_chunk = avl_nearest(&vmr_pool->ivmp_mr_list, where,
+ AVL_BEFORE);
+ if (nearest_chunk == NULL) {
+ mutex_exit(&vmr_pool->ivmp_mutex);
+ return (IDM_STATUS_FAIL);
+ }
+ }
+
+ /* See if this chunk contains the specified address range */
+ ASSERT(nearest_chunk->is_mrva <= mrva);
+ chunk_end = nearest_chunk->is_mrva + nearest_chunk->is_mrlen;
+ if (chunk_end >= mrva + size) {
+ /* Yes, this chunk contains the address range */
+ mr->is_mrhdl = nearest_chunk->is_mrhdl;
+ mr->is_mrva = mrva;
+ mr->is_mrlen = size;
+ mr->is_mrlkey = nearest_chunk->is_mrlkey;
+ mr->is_mrrkey = nearest_chunk->is_mrrkey;
+ mutex_exit(&vmr_pool->ivmp_mutex);
+ return (IDM_STATUS_SUCCESS);
+ }
+ mutex_exit(&vmr_pool->ivmp_mutex);
+
+ return (IDM_STATUS_FAIL);
+}
+
+static iser_mr_t *
+iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize,
+ ibt_mr_flags_t mr_flags)
+{
+ void *chunk = NULL;
+ iser_mr_t *result = NULL;
+ int km_flags = 0;
+
+ if (mr_flags & IBT_MR_NOSLEEP)
+ km_flags |= KM_NOSLEEP;
+
+ while ((chunk == NULL) && (chunksize >= ISER_MIN_CHUNKSIZE)) {
+ chunk = kmem_alloc(chunksize, km_flags);
+ if (chunk == NULL) {
+ ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
+ "chunk alloc of %d failed, trying %d",
+ (int)chunksize, (int)(chunksize / 2));
+ chunksize /= 2;
+ } else {
+ ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
+ "New chunk %p size %d", chunk, (int)chunksize);
+ }
+ }
+
+ if (chunk != NULL) {
+ result = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)chunk,
+ chunksize, mr_flags);
+ if (result == NULL) {
+ ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
+ "Chunk registration failed");
+ kmem_free(chunk, chunksize);
+ }
+ }
+
+ return (result);
+}
+
+static void
+iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr)
+{
+ void *chunk = (void *)(uintptr_t)iser_mr->is_mrva;
+ ib_memlen_t chunksize = iser_mr->is_mrlen;
+
+ iser_dereg_mem(hca, iser_mr);
+
+ kmem_free(chunk, chunksize);
+}
+
+iser_mr_t *
+iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr, ib_memlen_t len,
+ ibt_mr_flags_t mr_flags)
+{
+ iser_mr_t *result = NULL;
+ ibt_mr_attr_t mr_attr;
+ ibt_mr_desc_t mr_desc;
+ ibt_status_t status;
+ int km_flags = 0;
+
+ if (mr_flags & IBT_MR_NOSLEEP)
+ mr_flags |= KM_NOSLEEP;
+
+ result = (iser_mr_t *)kmem_zalloc(sizeof (iser_mr_t), km_flags);
+ if (result == NULL) {
+ ISER_LOG(CE_NOTE, "iser_reg_mem: failed to allocate "
+ "memory for iser_mr handle");
+ return (NULL);
+ }
+
+ bzero(&mr_attr, sizeof (ibt_mr_attr_t));
+ bzero(&mr_desc, sizeof (ibt_mr_desc_t));
+
+ mr_attr.mr_vaddr = vaddr;
+ mr_attr.mr_len = len;
+ mr_attr.mr_as = NULL;
+ mr_attr.mr_flags = mr_flags;
+
+ status = ibt_register_mr(hca->hca_hdl, hca->hca_pdhdl, &mr_attr,
+ &result->is_mrhdl, &mr_desc);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_reg_mem: ibt_register_mr "
+ "failure (%d)", status);
+ kmem_free(result, sizeof (iser_mr_t));
+ return (NULL);
+ }
+
+ result->is_mrva = mr_attr.mr_vaddr;
+ result->is_mrlen = mr_attr.mr_len;
+ result->is_mrlkey = mr_desc.md_lkey;
+ result->is_mrrkey = mr_desc.md_rkey;
+
+ return (result);
+}
+
+void
+iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr)
+{
+ ibt_deregister_mr(hca->hca_hdl, mr->is_mrhdl);
+ kmem_free(mr, sizeof (iser_mr_t));
+}
+
+static int
+iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2)
+{
+ iser_mr_t *mr1 = (iser_mr_t *)void_mr1;
+ iser_mr_t *mr2 = (iser_mr_t *)void_mr2;
+
+ /* Sort memory chunks by their virtual address */
+ if (mr1->is_mrva < mr2->is_mrva)
+ return (-1);
+ else if (mr1->is_mrva > mr2->is_mrva)
+ return (1);
+
+ return (0);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/iser/iser_xfer.c Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,524 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/byteorder.h>
+
+#include <sys/ib/clients/iser/iser.h>
+
+/*
+ * iser_xfer.c
+ */
+
+int
+iser_xfer_hello_msg(iser_chan_t *chan)
+{
+ iser_hca_t *hca;
+ iser_wr_t *iser_wr;
+ iser_msg_t *msg;
+ ibt_send_wr_t wr;
+ iser_hello_hdr_t *hdr;
+ int status;
+
+ ASSERT(chan != NULL);
+
+ hca = (iser_hca_t *)chan->ic_hca;
+ if (hca == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_hello_msg: no hca handle found");
+ return (ISER_STATUS_FAIL);
+ }
+
+ msg = iser_msg_get(hca, 1, NULL);
+
+ if (msg == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_hello_msg: iser message cache "
+ "alloc failed");
+ return (ISER_STATUS_FAIL);
+ }
+
+ /* Send iSER Hello Message to declare iSER parameters to the target */
+ hdr = (iser_hello_hdr_t *)(uintptr_t)msg->msg_ds.ds_va;
+
+ hdr->opcode = ISER_OPCODE_HELLO_MSG;
+ hdr->rsvd1 = 0;
+ hdr->maxver = 1;
+ hdr->minver = 1;
+ hdr->iser_ird = htons(ISER_IB_DEFAULT_IRD);
+ hdr->rsvd2[0] = 0;
+ hdr->rsvd2[1] = 0;
+
+ /* Allocate an iSER WR handle and tuck this msg into it */
+ iser_wr = iser_wr_get();
+ if (iser_wr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_hello_msg: unable to allocate "
+ "iser wr handle");
+ iser_msg_free(msg);
+ return (ISER_STATUS_FAIL);
+ }
+ iser_wr->iw_msg = msg;
+ iser_wr->iw_type = ISER_WR_SEND;
+
+ /* Use the address of our generic iser_wr handle as our WRID */
+ wr.wr_id = (ibt_wrid_t)(uintptr_t)iser_wr;
+
+ /* Populate the rest of the work request */
+ wr.wr_trans = IBT_RC_SRV;
+ wr.wr_opcode = IBT_WRC_SEND;
+ wr.wr_nds = 1;
+ wr.wr_sgl = &msg->msg_ds;
+
+ status = ibt_post_send(chan->ic_chanhdl, &wr, 1, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_xfer_hello_msg: ibt_post_send "
+ "failure (%d)", status);
+ iser_msg_free(msg);
+ iser_wr_free(iser_wr);
+ return (ISER_STATUS_FAIL);
+ }
+ /* Increment this channel's SQ posted count */
+ mutex_enter(&chan->ic_sq_post_lock);
+ chan->ic_sq_post_count++;
+ if (chan->ic_sq_post_count > chan->ic_sq_max_post_count)
+ chan->ic_sq_max_post_count = chan->ic_sq_post_count;
+ mutex_exit(&chan->ic_sq_post_lock);
+
+ ISER_LOG(CE_NOTE, "Posting iSER Hello message: chan (0x%p): "
+ "IP [%x to %x]", (void *)chan, chan->ic_localip.un.ip4addr,
+ chan->ic_remoteip.un.ip4addr);
+
+ return (ISER_STATUS_SUCCESS);
+}
+
+int
+iser_xfer_helloreply_msg(iser_chan_t *chan)
+{
+ iser_hca_t *hca;
+ iser_wr_t *iser_wr;
+ ibt_send_wr_t wr;
+ iser_msg_t *msg;
+ iser_helloreply_hdr_t *hdr;
+ int status;
+
+ ASSERT(chan != NULL);
+
+ hca = (iser_hca_t *)chan->ic_hca;
+ if (hca == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_helloreply_msg: no hca handle "
+ "found");
+ return (ISER_STATUS_FAIL);
+ }
+
+ msg = iser_msg_get(hca, 1, NULL);
+
+ if (msg == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_helloreply_msg: iser message "
+ "cache alloc failed");
+ return (ISER_STATUS_FAIL);
+ }
+
+ /* Use the iSER Hello Reply Message */
+ hdr = (iser_helloreply_hdr_t *)(uintptr_t)msg->msg_ds.ds_va;
+
+ hdr->opcode = ISER_OPCODE_HELLOREPLY_MSG;
+ hdr->rsvd1 = 0;
+ hdr->flag = 0;
+ hdr->maxver = 1;
+ hdr->curver = 1;
+ hdr->iser_ord = htons(ISER_IB_DEFAULT_ORD);
+ hdr->rsvd2[0] = 0;
+ hdr->rsvd2[1] = 0;
+
+ /* Allocate an iSER WR handle and tuck this msg into it */
+ iser_wr = iser_wr_get();
+ if (iser_wr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_helloreply_msg: unable to "
+ "allocate iser wr handle");
+ iser_msg_free(msg);
+ return (ISER_STATUS_FAIL);
+ }
+ iser_wr->iw_msg = msg;
+ iser_wr->iw_type = ISER_WR_SEND;
+
+ /* Use the address of our generic iser_wr handle as our WRID */
+ wr.wr_id = (ibt_wrid_t)(uintptr_t)iser_wr;
+
+ /* Populate the rest of the work request */
+ wr.wr_trans = IBT_RC_SRV;
+ wr.wr_opcode = IBT_WRC_SEND;
+ wr.wr_nds = 1;
+ wr.wr_sgl = &msg->msg_ds;
+
+ status = ibt_post_send(chan->ic_chanhdl, &wr, 1, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_xfer_helloreply_msg: ibt_post_send "
+ "failure (%d)", status);
+ iser_msg_free(msg);
+ iser_wr_free(iser_wr);
+ return (ISER_STATUS_FAIL);
+ }
+ /* Increment this channel's SQ posted count */
+ mutex_enter(&chan->ic_sq_post_lock);
+ chan->ic_sq_post_count++;
+ if (chan->ic_sq_post_count > chan->ic_sq_max_post_count)
+ chan->ic_sq_max_post_count = chan->ic_sq_post_count;
+ mutex_exit(&chan->ic_sq_post_lock);
+
+ ISER_LOG(CE_NOTE, "Posting iSER HelloReply message: chan (0x%p): "
+ "IP [%x to %x]", (void *)chan, chan->ic_localip.un.ip4addr,
+ chan->ic_remoteip.un.ip4addr);
+
+ return (ISER_STATUS_SUCCESS);
+}
+
+/*
+ * iser_xfer_ctrlpdu
+ *
+ * This is iSER's implementation of the 'Send_control' operational primitive.
+ * This iSER layer uses the Send Message type of RCaP to transfer the iSCSI
+ * Control-type PDU. A special case is that the transfer of SCSI Data-out PDUs
+ * carrying unsolicited data are also treated as iSCSI Control-Type PDUs. The
+ * message payload contains an iSER header followed by the iSCSI Control-type
+ * the iSCSI Control-type PDU.
+ * This function is invoked by an initiator iSCSI layer requesting the transfer
+ * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
+ * iSCSI response PDU.
+ */
+int
+iser_xfer_ctrlpdu(iser_chan_t *chan, idm_pdu_t *pdu)
+{
+ iser_hca_t *hca;
+ iser_ctrl_hdr_t *hdr;
+ iser_msg_t *msg;
+ iser_wr_t *iser_wr;
+ ibt_send_wr_t wr;
+ int status;
+ iser_mr_t *mr;
+ iscsi_data_hdr_t *bhs;
+ idm_conn_t *ic;
+ idm_task_t *idt = NULL;
+ idm_buf_t *buf;
+
+ ASSERT(chan != NULL);
+
+ /*
+ * All SCSI command PDU (except SCSI Read and SCSI Write) and the SCSI
+ * Response PDU are sent to the remote end using the SendSE Message.
+ *
+ * Setup a Send Message for carrying the iSCSI control-type PDU
+ * preceeded by an iSER header.
+ */
+ hca = (iser_hca_t *)chan->ic_hca;
+ if (hca == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_ctrlpdu: no hca handle found");
+ return (ISER_STATUS_FAIL);
+ }
+
+ msg = iser_msg_get(hca, 1, NULL);
+ if (msg == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_ctrlpdu: iser message cache "
+ "alloc failed");
+ return (ISER_STATUS_FAIL);
+ }
+
+ /* Pull the BHS out of the PDU handle */
+ bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
+
+ ASSERT(chan->ic_conn != NULL && chan->ic_conn->ic_idmc != NULL);
+ ic = chan->ic_conn->ic_idmc;
+ ASSERT(ic != NULL);
+
+ hdr = (iser_ctrl_hdr_t *)(uintptr_t)msg->msg_ds.ds_va;
+
+ /*
+ * Initialize header assuming no transfers
+ */
+ bzero(hdr, sizeof (*hdr));
+ hdr->opcode = ISER_OPCODE_CTRL_TYPE_PDU;
+
+ /*
+ * On the initiator side, the task buffers will be used to identify
+ * if there are any buffers to be advertised
+ */
+ if ((ic->ic_conn_type == CONN_TYPE_INI) &&
+ ((bhs->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_CMD) &&
+ ((idt = idm_task_find(ic, bhs->itt, bhs->ttt)) != NULL)) {
+
+ if (!list_is_empty(&idt->idt_inbufv)) {
+ buf = idm_buf_find(&idt->idt_inbufv, 0);
+ ASSERT(buf != NULL);
+
+ mr = (iser_mr_t *)buf->idb_reg_private;
+ ASSERT(mr != NULL);
+
+ hdr->rsv_flag = 1;
+ hdr->rstag = htonl(mr->is_mrrkey);
+ BE_OUT64(&hdr->rva, mr->is_mrva);
+ }
+
+ if (!list_is_empty(&idt->idt_outbufv)) {
+ buf = idm_buf_find(&idt->idt_outbufv, 0);
+ ASSERT(buf != NULL);
+
+ mr = (iser_mr_t *)buf->idb_reg_private;
+ ASSERT(mr != NULL);
+
+ hdr->wsv_flag = 1;
+ hdr->wstag = htonl(mr->is_mrrkey);
+ BE_OUT64(&hdr->wva, mr->is_mrva);
+ }
+
+ /* Release our reference on the task */
+ idm_task_rele(idt);
+ }
+
+ /* Copy the BHS after the iSER header */
+ bcopy(pdu->isp_hdr,
+ (uint8_t *)(uintptr_t)msg->msg_ds.ds_va + ISER_HEADER_LENGTH,
+ pdu->isp_hdrlen);
+
+ if (pdu->isp_datalen > 0) {
+ /* Copy the isp_data after the PDU header */
+ bcopy(pdu->isp_data,
+ (uint8_t *)(uintptr_t)msg->msg_ds.ds_va +
+ ISER_HEADER_LENGTH + pdu->isp_hdrlen,
+ pdu->isp_datalen);
+
+ /* Set the SGE's ds_len */
+ msg->msg_ds.ds_len = ISER_HEADER_LENGTH + pdu->isp_hdrlen +
+ pdu->isp_datalen;
+ } else {
+ /* No data, so set the SGE's ds_len to the headers length */
+ msg->msg_ds.ds_len = ISER_HEADER_LENGTH + pdu->isp_hdrlen;
+ }
+
+ /*
+ * Build Work Request to be posted on the Send Queue.
+ */
+ bzero(&wr, sizeof (wr));
+
+ /* Allocate an iSER WR handle and tuck the msg and pdu into it */
+ iser_wr = iser_wr_get();
+ if (iser_wr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_ctrlpdu: unable to allocate "
+ "iser wr handle");
+ iser_msg_free(msg);
+ return (ISER_STATUS_FAIL);
+ }
+ iser_wr->iw_pdu = pdu;
+ iser_wr->iw_msg = msg;
+ iser_wr->iw_type = ISER_WR_SEND;
+
+ /*
+ * Use the address of our generic iser_wr handle as our WRID
+ * and populate the rest of the work request
+ */
+ wr.wr_id = (ibt_wrid_t)(uintptr_t)iser_wr;
+ wr.wr_trans = IBT_RC_SRV;
+ wr.wr_opcode = IBT_WRC_SEND;
+ wr.wr_nds = 1;
+ wr.wr_sgl = &msg->msg_ds;
+
+ /* Post Send Work Request on the specified channel */
+ status = ibt_post_send(chan->ic_chanhdl, &wr, 1, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_xfer_ctrlpdu: ibt_post_send "
+ "failure (%d)", status);
+ iser_msg_free(msg);
+ iser_wr_free(iser_wr);
+ return (ISER_STATUS_FAIL);
+ }
+ /* Increment this channel's SQ posted count */
+ mutex_enter(&chan->ic_sq_post_lock);
+ chan->ic_sq_post_count++;
+ if (chan->ic_sq_post_count > chan->ic_sq_max_post_count)
+ chan->ic_sq_max_post_count = chan->ic_sq_post_count;
+ mutex_exit(&chan->ic_sq_post_lock);
+
+ return (ISER_STATUS_SUCCESS);
+}
+
+/*
+ * iser_xfer_buf_to_ini
+ * This is iSER's implementation of the 'Put_Data' operational primitive.
+ * The iSCSI layer at the target invokes this function when it is ready to
+ * return the SCSI Read Data to the initiator. This function generates and
+ * sends an RDMA Write Message containing the read data to the initiator.
+ */
+int
+iser_xfer_buf_to_ini(idm_task_t *idt, idm_buf_t *buf)
+{
+ iser_conn_t *iser_conn;
+ iser_chan_t *iser_chan;
+ iser_buf_t *iser_buf;
+ iser_wr_t *iser_wr;
+ iser_ctrl_hdr_t *iser_hdr;
+ ibt_send_wr_t wr;
+ uint64_t reg_raddr;
+ uint32_t reg_rkey;
+ int status;
+
+ /* Grab the iSER resources from the task and buf handles */
+ iser_conn = (iser_conn_t *)idt->idt_ic->ic_transport_private;
+ iser_chan = iser_conn->ic_chan;
+ iser_buf = (iser_buf_t *)buf->idb_buf_private;
+ iser_hdr = (iser_ctrl_hdr_t *)idt->idt_transport_hdr;
+
+ /* Pull the Read STag data out of the iSER header in the task hdl */
+ reg_raddr = BE_IN64(&iser_hdr->rva);
+ reg_rkey = (ntohl(iser_hdr->rstag));
+
+ /* Set up the WR raddr and rkey based upon the Read iSER STag */
+ bzero(&wr, sizeof (ibt_send_wr_t));
+ wr.wr.rc.rcwr.rdma.rdma_raddr = reg_raddr + buf->idb_bufoffset;
+ wr.wr.rc.rcwr.rdma.rdma_rkey = reg_rkey;
+
+ /* Set the transfer length from the IDM buf handle */
+ iser_buf->buf_ds.ds_len = buf->idb_xfer_len;
+
+ /* Allocate an iSER WR handle and tuck the IDM buf handle into it */
+ iser_wr = iser_wr_get();
+ if (iser_wr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_buf_to_ini: unable to allocate "
+ "iser wr handle");
+ return (ISER_STATUS_FAIL);
+ }
+ iser_wr->iw_buf = buf;
+ iser_wr->iw_type = ISER_WR_RDMAW;
+
+ /* Use the address of our generic iser_wr handle as our WRID */
+ wr.wr_id = (ibt_wrid_t)(uintptr_t)iser_wr;
+
+ /* Populate the rest of the work request */
+ wr.wr_flags = IBT_WR_SEND_SIGNAL;
+ wr.wr_trans = IBT_RC_SRV;
+ wr.wr_opcode = IBT_WRC_RDMAW;
+ wr.wr_nds = 1;
+ wr.wr_sgl = &iser_buf->buf_ds;
+
+#ifdef DEBUG
+ bcopy(&wr, &iser_buf->buf_wr, sizeof (ibt_send_wr_t));
+#endif
+
+ status = ibt_post_send(iser_chan->ic_chanhdl, &wr, 1, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_xfer_buf_to_ini: ibt_post_send "
+ "failure (%d)", status);
+ iser_wr_free(iser_wr);
+ return (ISER_STATUS_FAIL);
+ }
+ /* Increment this channel's SQ posted count */
+ mutex_enter(&iser_chan->ic_sq_post_lock);
+ iser_chan->ic_sq_post_count++;
+ if (iser_chan->ic_sq_post_count > iser_chan->ic_sq_max_post_count)
+ iser_chan->ic_sq_max_post_count = iser_chan->ic_sq_post_count;
+ mutex_exit(&iser_chan->ic_sq_post_lock);
+
+ return (ISER_STATUS_SUCCESS);
+}
+
+/*
+ * iser_xfer_buf_from_ini
+ * This is iSER's implementation of the 'Get_Data' operational primitive.
+ * The iSCSI layer at the target invokes this function when it is ready to
+ * receive the SCSI Write Data from the initiator. This function generates and
+ * sends an RDMA Read Message to get the data from the initiator. No R2T PDUs
+ * are generated.
+ */
+int
+iser_xfer_buf_from_ini(idm_task_t *idt, idm_buf_t *buf)
+{
+ iser_conn_t *iser_conn;
+ iser_chan_t *iser_chan;
+ iser_buf_t *iser_buf;
+ iser_wr_t *iser_wr;
+ iser_ctrl_hdr_t *iser_hdr;
+ ibt_send_wr_t wr;
+ uint64_t reg_raddr;
+ uint32_t reg_rkey;
+ int status;
+
+ /* Grab the iSER resources from the task and buf handles */
+ iser_conn = (iser_conn_t *)idt->idt_ic->ic_transport_private;
+ iser_chan = iser_conn->ic_chan;
+ iser_buf = (iser_buf_t *)buf->idb_buf_private;
+ iser_hdr = (iser_ctrl_hdr_t *)idt->idt_transport_hdr;
+
+ /* Pull the Write STag data out of the iSER header in the task hdl */
+ reg_raddr = BE_IN64(&iser_hdr->wva);
+ reg_rkey = (ntohl(iser_hdr->wstag));
+
+ /* Set up the WR raddr and rkey based upon the iSER Write STag */
+ bzero(&wr, sizeof (ibt_send_wr_t));
+ wr.wr.rc.rcwr.rdma.rdma_raddr = reg_raddr + buf->idb_bufoffset;
+ wr.wr.rc.rcwr.rdma.rdma_rkey = reg_rkey;
+
+ /* Set the transfer length from the IDM buf handle */
+ iser_buf->buf_ds.ds_len = buf->idb_xfer_len;
+
+ /* Allocate an iSER WR handle and tuck the IDM buf handle into it */
+ iser_wr = iser_wr_get();
+ if (iser_wr == NULL) {
+ ISER_LOG(CE_NOTE, "iser_xfer_buf_from_ini: unable to allocate "
+ "iser wr handle");
+ return (ISER_STATUS_FAIL);
+ }
+ iser_wr->iw_buf = buf;
+ iser_wr->iw_type = ISER_WR_RDMAR;
+
+ /* Use the address of our generic iser_wr handle as our WRID */
+ wr.wr_id = (ibt_wrid_t)(uintptr_t)iser_wr;
+
+ /* Populate the rest of the work request */
+ wr.wr_flags = IBT_WR_SEND_SIGNAL;
+ wr.wr_trans = IBT_RC_SRV;
+ wr.wr_opcode = IBT_WRC_RDMAR;
+ wr.wr_nds = 1;
+ wr.wr_sgl = &iser_buf->buf_ds;
+
+#ifdef DEBUG
+ bcopy(&wr, &iser_buf->buf_wr, sizeof (ibt_send_wr_t));
+#endif
+
+ status = ibt_post_send(iser_chan->ic_chanhdl, &wr, 1, NULL);
+ if (status != IBT_SUCCESS) {
+ ISER_LOG(CE_NOTE, "iser_xfer_buf_from_ini: ibt_post_send "
+ "failure (%d)", status);
+ iser_wr_free(iser_wr);
+ return (ISER_STATUS_FAIL);
+ }
+ /* Increment this channel's SQ posted count */
+ mutex_enter(&iser_chan->ic_sq_post_lock);
+ iser_chan->ic_sq_post_count++;
+ if (iser_chan->ic_sq_post_count > iser_chan->ic_sq_max_post_count)
+ iser_chan->ic_sq_max_post_count = iser_chan->ic_sq_post_count;
+ mutex_exit(&iser_chan->ic_sq_post_lock);
+
+ return (ISER_STATUS_SUCCESS);
+}
--- a/usr/src/uts/common/io/idm/idm.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/idm/idm.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -66,9 +66,16 @@
static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
idm_abort_type_t abort_type);
static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
+static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
+ int sleepflag);
boolean_t idm_conn_logging = 0;
boolean_t idm_svc_logging = 0;
+#ifdef DEBUG
+boolean_t idm_pattern_checking = 1;
+#else
+boolean_t idm_pattern_checking = 0;
+#endif
/*
* Potential tuneable for the maximum number of tasks. Default to
@@ -228,14 +235,34 @@
*
*/
void
+idm_ini_conn_destroy_task(void *ic_void)
+{
+ idm_conn_t *ic = ic_void;
+
+ ic->ic_transport_ops->it_ini_conn_destroy(ic);
+ idm_conn_destroy_common(ic);
+}
+
+void
idm_ini_conn_destroy(idm_conn_t *ic)
{
+ /*
+ * It's reasonable for the initiator to call idm_ini_conn_destroy
+ * from within the context of the CN_CONNECT_DESTROY notification.
+ * That's a problem since we want to destroy the taskq for the
+ * state machine associated with the connection. Remove the
+ * connection from the list right away then handle the remaining
+ * work via the idm_global_taskq.
+ */
mutex_enter(&idm.idm_global_mutex);
list_remove(&idm.idm_ini_conn_list, ic);
mutex_exit(&idm.idm_global_mutex);
- ic->ic_transport_ops->it_ini_conn_destroy(ic);
- idm_conn_destroy_common(ic);
+ if (taskq_dispatch(idm.idm_global_taskq,
+ &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
+ cmn_err(CE_WARN,
+ "idm_ini_conn_destroy: Couldn't dispatch task");
+ }
}
/*
@@ -243,24 +270,31 @@
*
* Establish connection to the remote system identified in idm_conn_t.
* The connection parameters including the remote IP address were established
- * in the call to idm_ini_conn_create.
+ * in the call to idm_ini_conn_create. The IDM state machine will
+ * perform client notifications as necessary to prompt the initiator through
+ * the login process. IDM also keeps a timer running so that if the login
+ * process doesn't complete in a timely manner it will fail.
*
* ic - idm_conn_t structure representing the relevant connection
*
* Returns success if the connection was established, otherwise some kind
* of meaningful error code.
*
- * Upon return the initiator can send a "login" request when it is ready.
+ * Upon return the login has either failed or is loggin in (ffp)
*/
idm_status_t
idm_ini_conn_connect(idm_conn_t *ic)
{
- idm_status_t rc;
+ idm_status_t rc = IDM_STATUS_SUCCESS;
rc = idm_conn_sm_init(ic);
if (rc != IDM_STATUS_SUCCESS) {
return (ic->ic_conn_sm_status);
}
+
+ /* Hold connection until we return */
+ idm_conn_hold(ic);
+
/* Kick state machine */
idm_conn_event(ic, CE_CONNECT_REQ, NULL);
@@ -274,6 +308,7 @@
if (ic->ic_state_flags & CF_ERROR) {
/* ic->ic_conn_sm_status will contains failure status */
+ idm_conn_rele(ic);
return (ic->ic_conn_sm_status);
}
@@ -281,21 +316,9 @@
ASSERT(ic->ic_state_flags & CF_LOGIN_READY);
(void) idm_notify_client(ic, CN_READY_FOR_LOGIN, NULL);
- return (IDM_STATUS_SUCCESS);
-}
+ idm_conn_rele(ic);
-/*
- * idm_ini_conn_sm_fini_task()
- *
- * Dispatch a thread on the global taskq to tear down an initiator connection's
- * state machine. Note: We cannot do this from the disconnect thread as we will
- * end up in a situation wherein the thread is running on a taskq that it then
- * attempts to destroy.
- */
-static void
-idm_ini_conn_sm_fini_task(void *ic_void)
-{
- idm_conn_sm_fini((idm_conn_t *)ic_void);
+ return (rc);
}
/*
@@ -306,30 +329,38 @@
*
* ic - idm_conn_t structure representing the relevant connection
*
- * This is synchronous and it will return when the connection has been
- * properly shutdown.
+ * This is asynchronous and will return before the connection is properly
+ * shutdown
*/
/* ARGSUSED */
void
idm_ini_conn_disconnect(idm_conn_t *ic)
{
- mutex_enter(&ic->ic_state_mutex);
+ idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
+}
- if (ic->ic_state_flags == 0) {
- /* already disconnected */
- mutex_exit(&ic->ic_state_mutex);
- return;
+/*
+ * idm_ini_conn_disconnect_wait
+ *
+ * Forces a connection (previously established using idm_ini_conn_connect)
+ * to perform a controlled shutdown. Blocks until the connection is
+ * disconnected.
+ *
+ * ic - idm_conn_t structure representing the relevant connection
+ */
+/* ARGSUSED */
+void
+idm_ini_conn_disconnect_sync(idm_conn_t *ic)
+{
+ mutex_enter(&ic->ic_state_mutex);
+ if ((ic->ic_state != CS_S9_INIT_ERROR) &&
+ (ic->ic_state != CS_S11_COMPLETE)) {
+ idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
+ while ((ic->ic_state != CS_S9_INIT_ERROR) &&
+ (ic->ic_state != CS_S11_COMPLETE))
+ cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
}
- ic->ic_state_flags = 0;
- ic->ic_conn_sm_status = 0;
mutex_exit(&ic->ic_state_mutex);
-
- /* invoke the transport-specific conn_destroy */
- (void) ic->ic_transport_ops->it_ini_conn_disconnect(ic);
-
- /* teardown the connection sm */
- (void) taskq_dispatch(idm.idm_global_taskq, &idm_ini_conn_sm_fini_task,
- (void *)ic, TQ_SLEEP);
}
/*
@@ -425,13 +456,6 @@
cv_broadcast(&idm.idm_tgt_svc_cv);
mutex_exit(&idm.idm_global_mutex);
- /* tear down the svc resources */
- idm_refcnt_destroy(&is->is_refcnt);
- cv_destroy(&is->is_count_cv);
- mutex_destroy(&is->is_count_mutex);
- cv_destroy(&is->is_cv);
- mutex_destroy(&is->is_mutex);
-
/* teardown each transport-specific service */
for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
it = &idm_transport_list[type];
@@ -442,6 +466,13 @@
it->it_ops->it_tgt_svc_destroy(is);
}
+ /* tear down the svc resources */
+ idm_refcnt_destroy(&is->is_refcnt);
+ cv_destroy(&is->is_count_cv);
+ mutex_destroy(&is->is_count_mutex);
+ cv_destroy(&is->is_cv);
+ mutex_destroy(&is->is_mutex);
+
/* free the svc handle */
kmem_free(is, sizeof (idm_svc_t));
}
@@ -475,15 +506,13 @@
idm_tgt_svc_online(idm_svc_t *is)
{
- idm_transport_type_t type;
+ idm_transport_type_t type, last_type;
idm_transport_t *it;
- int rc;
- int svc_found;
+ int rc = IDM_STATUS_SUCCESS;
mutex_enter(&is->is_mutex);
- /* Walk through each of the transports and online them */
if (is->is_online == 0) {
- svc_found = 0;
+ /* Walk through each of the transports and online them */
for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
it = &idm_transport_list[type];
if (it->it_ops == NULL) {
@@ -494,19 +523,39 @@
mutex_exit(&is->is_mutex);
rc = it->it_ops->it_tgt_svc_online(is);
mutex_enter(&is->is_mutex);
- if (rc == IDM_STATUS_SUCCESS) {
- /* We have at least one service running. */
- svc_found = 1;
+ if (rc != IDM_STATUS_SUCCESS) {
+ last_type = type;
+ break;
}
}
+ if (rc != IDM_STATUS_SUCCESS) {
+ /*
+ * The last transport failed to online.
+ * Offline any transport onlined above and
+ * do not online the target.
+ */
+ for (type = 0; type < last_type; type++) {
+ it = &idm_transport_list[type];
+ if (it->it_ops == NULL) {
+ /* transport is not registered */
+ continue;
+ }
+
+ mutex_exit(&is->is_mutex);
+ it->it_ops->it_tgt_svc_offline(is);
+ mutex_enter(&is->is_mutex);
+ }
+ } else {
+ /* Target service now online */
+ is->is_online = 1;
+ }
} else {
- svc_found = 1;
+ /* Target service already online, just bump the count */
+ is->is_online++;
}
- if (svc_found)
- is->is_online++;
mutex_exit(&is->is_mutex);
- return (svc_found ? IDM_STATUS_SUCCESS : IDM_STATUS_FAIL);
+ return (rc);
}
/*
@@ -600,12 +649,11 @@
* Passes the set of key value pairs to the transport for activation.
* This will be invoked as the connection is entering full-feature mode.
*/
-idm_status_t
+void
idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
{
ASSERT(ic->ic_transport_ops != NULL);
- return (ic->ic_transport_ops->it_notice_key_values(ic,
- negotiated_nvl));
+ ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
}
/*
@@ -640,6 +688,13 @@
idb->idb_xfer_len = xfer_len;
idb->idb_buf_cb = idb_buf_cb;
idb->idb_cb_arg = cb_arg;
+ gethrestime(&idb->idb_xfer_start);
+
+ /*
+ * Buffer should not contain the pattern. If the pattern is
+ * present then we've been asked to transmit initialized data
+ */
+ IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
mutex_enter(&idt->idt_mutex);
switch (idt->idt_state) {
@@ -715,6 +770,7 @@
idb->idb_xfer_len = xfer_len;
idb->idb_buf_cb = idb_buf_cb;
idb->idb_cb_arg = cb_arg;
+ gethrestime(&idb->idb_xfer_start);
/*
* "In" buf list is for "Data In" PDU's, "Out" buf list is for
@@ -766,6 +822,7 @@
idb->idb_in_transport = B_FALSE;
idb->idb_tx_thread = B_FALSE;
idt->idt_tx_to_ini_done++;
+ gethrestime(&idb->idb_xfer_done);
/*
* idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
@@ -827,6 +884,7 @@
ASSERT(mutex_owned(&idt->idt_mutex));
idb->idb_in_transport = B_FALSE;
idt->idt_rx_from_ini_done++;
+ gethrestime(&idb->idb_xfer_done);
/*
* idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
@@ -836,6 +894,14 @@
idm_task_rele(idt);
idb->idb_status = status;
+ if (status == IDM_STATUS_SUCCESS) {
+ /*
+ * Buffer should not contain the pattern. If it does then
+ * we did not get the data from the remote host.
+ */
+ IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
+ }
+
switch (idt->idt_state) {
case TASK_ACTIVE:
idm_buf_unbind_out_locked(idt, idb);
@@ -919,6 +985,8 @@
buf->idb_bufoffset = 0;
buf->idb_xfer_len = 0;
buf->idb_magic = IDM_BUF_MAGIC;
+ buf->idb_in_transport = B_FALSE;
+ buf->idb_bufbcopy = B_FALSE;
/*
* If bufptr is NULL, we have an implicit request to allocate
@@ -945,8 +1013,13 @@
buf->idb_bufalloc = B_TRUE;
} else {
/*
- * Set the passed bufptr into the buf handle, and
- * register the handle with the transport layer.
+ * For large transfers, Set the passed bufptr into
+ * the buf handle, and register the handle with the
+ * transport layer. As memory registration with the
+ * transport layer is a time/cpu intensive operation,
+ * for small transfers (up to a pre-defined bcopy
+ * threshold), use pre-registered memory buffers
+ * and bcopy data at the appropriate time.
*/
buf->idb_buf = bufptr;
@@ -956,12 +1029,15 @@
kmem_cache_free(idm.idm_buf_cache, buf);
return (NULL);
}
- /* Ensure bufalloc'd flag is unset */
- buf->idb_bufalloc = B_FALSE;
+ /*
+ * The transport layer is now expected to set the idb_bufalloc
+ * correctly to indicate if resources have been allocated.
+ */
}
+ IDM_BUFPAT_SET(buf);
+
return (buf);
-
}
/*
@@ -1013,6 +1089,14 @@
void
idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
{
+ /*
+ * For small transfers, the iSER transport delegates the IDM
+ * layer to bcopy the SCSI Write data for faster IOPS.
+ */
+ if (buf->idb_bufbcopy == B_TRUE) {
+
+ bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
+ }
mutex_enter(&idt->idt_mutex);
idm_buf_bind_out_locked(idt, buf);
mutex_exit(&idt->idt_mutex);
@@ -1029,6 +1113,14 @@
void
idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
{
+ /*
+ * For small transfers, the iSER transport delegates the IDM
+ * layer to bcopy the SCSI Read data into the read buufer
+ * for faster IOPS.
+ */
+ if (buf->idb_bufbcopy == B_TRUE) {
+ bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
+ }
mutex_enter(&idt->idt_mutex);
idm_buf_unbind_in_locked(idt, buf);
mutex_exit(&idt->idt_mutex);
@@ -1080,6 +1172,66 @@
return (NULL);
}
+void
+idm_bufpat_set(idm_buf_t *idb)
+{
+ idm_bufpat_t *bufpat;
+ int len, i;
+
+ len = idb->idb_buflen;
+ len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
+
+ bufpat = idb->idb_buf;
+ for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
+ bufpat->bufpat_idb = idb;
+ bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
+ bufpat->bufpat_offset = i;
+ bufpat++;
+ }
+}
+
+boolean_t
+idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
+{
+ idm_bufpat_t *bufpat;
+ int len, i;
+
+ len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
+ len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
+ ASSERT(len <= idb->idb_buflen);
+ bufpat = idb->idb_buf;
+
+ /*
+ * Don't check the pattern in buffers that came from outside IDM
+ * (these will be buffers from the initiator that we opted not
+ * to double-buffer)
+ */
+ if (!idb->idb_bufalloc)
+ return (B_FALSE);
+
+ /*
+ * Return true if we find the pattern anywhere in the buffer
+ */
+ for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
+ if (BUFPAT_MATCH(bufpat, idb)) {
+ IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
+ "idb %p bufpat %p "
+ "bufpat_idb=%p bufmagic=%08x offset=%08x",
+ (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
+ bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
+ DTRACE_PROBE2(bufpat__pattern__found,
+ idm_buf_t *, idb, idm_bufpat_t *, bufpat);
+ if (type == BP_CHECK_ASSERT) {
+ ASSERT(0);
+ }
+ return (B_TRUE);
+ }
+ bufpat++;
+ }
+
+ return (B_FALSE);
+}
+
/*
* idm_task_alloc
*
@@ -1123,7 +1275,7 @@
/*
* idm_task_start
*
- * Add the task to an AVL tree to notify IDM about a new task. The caller
+ * Mark the task active and initialize some stats. The caller
* sets up the idm_task_t structure with a prior call to idm_task_alloc().
* The task service does not function as a task/work engine, it is the
* responsibility of the initiator to start the data transfer and free the
@@ -1138,22 +1290,35 @@
idt->idt_state = TASK_ACTIVE;
idt->idt_client_handle = handle;
idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
- idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 0;
+ idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
+ idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
}
/*
* idm_task_done
*
- * This function will remove the task from the AVL tree indicating that the
- * task is no longer active.
+ * This function sets the state to indicate that the task is no longer active.
*/
void
idm_task_done(idm_task_t *idt)
{
ASSERT(idt != NULL);
- ASSERT(idt->idt_refcnt.ir_refcnt == 0);
+
+ mutex_enter(&idt->idt_mutex);
+ idt->idt_state = TASK_IDLE;
+ mutex_exit(&idt->idt_mutex);
- idt->idt_state = TASK_IDLE;
+ /*
+ * Although unlikely it is possible for a reference to come in after
+ * the client has decided the task is over but before we've marked
+ * the task idle. One specific unavoidable scenario is the case where
+ * received PDU with the matching ITT/TTT results in a successful
+ * lookup of this task. We are at the mercy of the remote node in
+ * that case so we need to handle it. Now that the task state
+ * has changed no more references will occur so a simple call to
+ * idm_refcnt_wait_ref should deal with the situation.
+ */
+ idm_refcnt_wait_ref(&idt->idt_refcnt);
idm_refcnt_reset(&idt->idt_refcnt);
}
@@ -1166,11 +1331,14 @@
void
idm_task_free(idm_task_t *idt)
{
- idm_conn_t *ic = idt->idt_ic;
+ idm_conn_t *ic;
ASSERT(idt != NULL);
+ ASSERT(idt->idt_refcnt.ir_refcnt == 0);
ASSERT(idt->idt_state == TASK_IDLE);
+ ic = idt->idt_ic;
+
/*
* It's possible for items to still be in the idt_inbufv list if
* they were added after idm_task_cleanup was called. We rely on
@@ -1190,13 +1358,13 @@
}
/*
- * idm_task_find
- *
- * This function looks up a task by task tag
+ * idm_task_find_common
+ * common code for idm_task_find() and idm_task_find_and_complete()
*/
/*ARGSUSED*/
-idm_task_t *
-idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
+static idm_task_t *
+idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
+ boolean_t complete)
{
uint32_t tt, client_handle;
idm_task_t *idt;
@@ -1224,16 +1392,32 @@
if (idt != NULL) {
mutex_enter(&idt->idt_mutex);
if ((idt->idt_state != TASK_ACTIVE) ||
+ (idt->idt_ic != ic) ||
(IDM_CONN_ISTGT(ic) &&
(idt->idt_client_handle != client_handle))) {
/*
- * Task is aborting, we don't want any more references.
+ * Task doesn't match or task is aborting and
+ * we don't want any more references.
*/
+ if ((idt->idt_ic != ic) &&
+ (idt->idt_state == TASK_ACTIVE) &&
+ (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
+ client_handle)) {
+ IDM_CONN_LOG(CE_WARN,
+ "idm_task_find: wrong connection %p != %p",
+ (void *)ic, (void *)idt->idt_ic);
+ }
mutex_exit(&idt->idt_mutex);
rw_exit(&idm.idm_taskid_table_lock);
return (NULL);
}
idm_task_hold(idt);
+ /*
+ * Set the task state to TASK_COMPLETE so it can no longer
+ * be found or aborted.
+ */
+ if (B_TRUE == complete)
+ idt->idt_state = TASK_COMPLETE;
mutex_exit(&idt->idt_mutex);
}
rw_exit(&idm.idm_taskid_table_lock);
@@ -1242,6 +1426,25 @@
}
/*
+ * This function looks up a task by task tag.
+ */
+idm_task_t *
+idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
+{
+ return (idm_task_find_common(ic, itt, ttt, B_FALSE));
+}
+
+/*
+ * This function looks up a task by task tag. If found, the task state
+ * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
+ */
+idm_task_t *
+idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
+{
+ return (idm_task_find_common(ic, itt, ttt, B_TRUE));
+}
+
+/*
* idm_task_find_by_handle
*
* This function looks up a task by the client-private idt_client_handle.
@@ -1323,15 +1526,21 @@
rw_enter(&idm.idm_taskid_table_lock, RW_READER);
for (idx = 0; idx < idm.idm_taskid_max; idx++) {
task = idm.idm_taskid_table[idx];
- if (task && (task->idt_state != TASK_IDLE) &&
+ if (task == NULL)
+ continue;
+ mutex_enter(&task->idt_mutex);
+ if ((task->idt_state != TASK_IDLE) &&
+ (task->idt_state != TASK_COMPLETE) &&
(task->idt_ic == ic)) {
rw_exit(&idm.idm_taskid_table_lock);
idm_task_abort_one(ic, task, abort_type);
rw_enter(&idm.idm_taskid_table_lock, RW_READER);
- }
+ } else
+ mutex_exit(&task->idt_mutex);
}
rw_exit(&idm.idm_taskid_table_lock);
} else {
+ mutex_enter(&idt->idt_mutex);
idm_task_abort_one(ic, idt, abort_type);
}
}
@@ -1360,11 +1569,15 @@
}
}
+/*
+ * Abort the idm task.
+ * Caller must hold the task mutex, which will be released before return
+ */
static void
idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
{
/* Caller must hold connection mutex */
- mutex_enter(&idt->idt_mutex);
+ ASSERT(mutex_owned(&idt->idt_mutex));
switch (idt->idt_state) {
case TASK_ACTIVE:
switch (abort_type) {
@@ -1689,11 +1902,10 @@
}
/*
- * Allocates a PDU along with memory for header and data.
+ * Common allocation of a PDU along with memory for header and data.
*/
-
-idm_pdu_t *
-idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
+static idm_pdu_t *
+idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
{
idm_pdu_t *result;
@@ -1706,21 +1918,45 @@
* length is assumed to be datalen. isp_hdrlen and isp_datalen
* can be adjusted after the PDU is returned if necessary.
*/
- result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, KM_SLEEP);
- result->isp_flags |= IDM_PDU_ALLOC; /* For idm_pdu_free sanity check */
- result->isp_hdr = (iscsi_hdr_t *)(result + 1); /* Ptr. Arithmetic */
- result->isp_hdrlen = hdrlen;
- result->isp_hdrbuflen = hdrlen;
- result->isp_transport_hdrlen = 0;
- result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
- result->isp_datalen = datalen;
- result->isp_databuflen = datalen;
- result->isp_magic = IDM_PDU_MAGIC;
+ result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
+ if (result != NULL) {
+ /* For idm_pdu_free sanity check */
+ result->isp_flags |= IDM_PDU_ALLOC;
+ /* pointer arithmetic */
+ result->isp_hdr = (iscsi_hdr_t *)(result + 1);
+ result->isp_hdrlen = hdrlen;
+ result->isp_hdrbuflen = hdrlen;
+ result->isp_transport_hdrlen = 0;
+ result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
+ result->isp_datalen = datalen;
+ result->isp_databuflen = datalen;
+ result->isp_magic = IDM_PDU_MAGIC;
+ }
return (result);
}
/*
+ * Typical idm_pdu_alloc invocation, will block for resources.
+ */
+idm_pdu_t *
+idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
+{
+ return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
+}
+
+/*
+ * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
+ * are not available. Needed for transport-layer allocations which may
+ * be invoking in interrupt context.
+ */
+idm_pdu_t *
+idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
+{
+ return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
+}
+
+/*
* Free a PDU previously allocated with idm_pdu_alloc() including any
* header and data space allocated as part of the original request.
* Additional memory regions referenced by subsequent modification of
@@ -2030,8 +2266,12 @@
cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
+ /*
+ * The maximum allocation needs to be high here since there can be
+ * many concurrent tasks using the global taskq.
+ */
idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
- 4, 4, TASKQ_PREPOPULATE);
+ 128, 16384, TASKQ_PREPOPULATE);
if (idm.idm_global_taskq == NULL) {
cv_destroy(&idm.idm_wd_cv);
cv_destroy(&idm.idm_tgt_svc_cv);
@@ -2115,7 +2355,15 @@
thread_join(idm.idm_wd_thread_did);
idm_idpool_destroy(&idm.idm_conn_id_pool);
+
+ /* Close any LDI handles we have open on transport drivers */
+ mutex_enter(&idm.idm_global_mutex);
+ idm_transport_teardown();
+ mutex_exit(&idm.idm_global_mutex);
+
+ /* Teardown the native sockets transport */
idm_so_fini();
+
list_destroy(&idm.idm_ini_conn_list);
list_destroy(&idm.idm_tgt_conn_list);
list_destroy(&idm.idm_tgt_svc_list);
--- a/usr/src/uts/common/io/idm/idm_conn_sm.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/idm/idm_conn_sm.c Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -34,6 +34,7 @@
#include <sys/sdt.h>
#define IDM_CONN_SM_STRINGS
+#define IDM_CN_NOTIFY_STRINGS
#include <sys/idm/idm.h>
boolean_t idm_sm_logging = B_FALSE;
@@ -41,10 +42,6 @@
extern idm_global_t idm; /* Global state */
static void
-idm_conn_event_locked(idm_conn_t *ic, idm_conn_event_t event,
- uintptr_t event_info, idm_pdu_event_type_t pdu_event_type);
-
-static void
idm_conn_event_handler(void *event_ctx_opaque);
static void
@@ -122,7 +119,7 @@
(void) snprintf(taskq_name, sizeof (taskq_name) - 1, "conn_sm%08x",
ic->ic_internal_cid);
- ic->ic_state_taskq = taskq_create(taskq_name, 1, minclsyspri, 2, 2,
+ ic->ic_state_taskq = taskq_create(taskq_name, 1, minclsyspri, 4, 16384,
TASKQ_PREPOPULATE);
if (ic->ic_state_taskq == NULL) {
return (IDM_STATUS_FAIL);
@@ -161,6 +158,7 @@
mutex_exit(&ic->ic_state_mutex);
}
+
idm_status_t
idm_conn_reinstate_event(idm_conn_t *old_ic, idm_conn_t *new_ic)
{
@@ -201,7 +199,7 @@
idm_conn_event_locked(ic, event, event_info, CT_RX_PDU);
}
-static void
+void
idm_conn_event_locked(idm_conn_t *ic, idm_conn_event_t event,
uintptr_t event_info, idm_pdu_event_type_t pdu_event_type)
{
@@ -263,7 +261,7 @@
(void *)ic, idm_ce_name[event_ctx->iec_event],
event_ctx->iec_event);
DTRACE_PROBE2(conn__event,
- idm_conn_t *, ic, smb_event_ctx_t *, event_ctx);
+ idm_conn_t *, ic, idm_conn_event_ctx_t *, event_ctx);
/*
* Validate event
@@ -285,6 +283,7 @@
* CE_TX_PROTOCOL_ERROR and CE_RX_PROTOCOL_ERROR events since
* no PDU's can be transmitted or received in that state.
*/
+ event_ctx->iec_pdu_forwarded = B_FALSE;
if (event_ctx->iec_pdu_event_type != CT_NONE) {
ASSERT(pdu != NULL);
action = idm_conn_sm_validate_pdu(ic, event_ctx, pdu);
@@ -378,10 +377,13 @@
idm_pdu_rx_protocol_error(ic, pdu);
break;
case CA_FORWARD:
- if (event_ctx->iec_pdu_event_type == CT_RX_PDU) {
- idm_pdu_rx_forward(ic, pdu);
- } else {
- idm_pdu_tx_forward(ic, pdu);
+ if (!event_ctx->iec_pdu_forwarded) {
+ if (event_ctx->iec_pdu_event_type ==
+ CT_RX_PDU) {
+ idm_pdu_rx_forward(ic, pdu);
+ } else {
+ idm_pdu_tx_forward(ic, pdu);
+ }
}
break;
default:
@@ -553,6 +555,15 @@
idm_state_s4_in_login_fail_snd_done;
break;
case CE_LOGIN_FAIL_RCV:
+ /*
+ * Need to deliver this PDU to the initiator now because after
+ * we update the state to CS_S9_INIT_ERROR the initiator will
+ * no longer be in an appropriate state.
+ */
+ event_ctx->iec_pdu_forwarded = B_TRUE;
+ pdu = (idm_pdu_t *)event_ctx->iec_info;
+ idm_pdu_rx_forward(ic, pdu);
+ /* FALLTHROUGH */
case CE_TRANSPORT_FAIL:
case CE_LOGOUT_OTHER_CONN_SND:
case CE_LOGOUT_OTHER_CONN_RCV:
@@ -715,6 +726,15 @@
}
break;
case CE_LOGOUT_SUCCESS_RCV:
+ /*
+ * Need to deliver this PDU to the initiator now because after
+ * we update the state to CS_S11_COMPLETE the initiator will
+ * no longer be in an appropriate state.
+ */
+ event_ctx->iec_pdu_forwarded = B_TRUE;
+ pdu = (idm_pdu_t *)event_ctx->iec_info;
+ idm_pdu_rx_forward(ic, pdu);
+ /* FALLTHROUGH */
case CE_LOGOUT_SESSION_SUCCESS:
/* T13 */
@@ -1126,12 +1146,31 @@
ic->ic_conn_sm_status = IDM_STATUS_FAIL;
cv_signal(&ic->ic_state_cv);
mutex_exit(&ic->ic_state_mutex);
- ic->ic_transport_ops->it_ini_conn_disconnect(ic);
+ if (ic->ic_last_state != CS_S1_FREE &&
+ ic->ic_last_state != CS_S2_XPT_WAIT) {
+ ic->ic_transport_ops->it_ini_conn_disconnect(
+ ic);
+ } else {
+ (void) idm_notify_client(ic, CN_CONNECT_FAIL,
+ NULL);
+ }
}
/*FALLTHROUGH*/
case CS_S11_COMPLETE:
- /* No more traffic on this connection */
- (void) idm_notify_client(ic, CN_CONNECT_LOST, NULL);
+ /*
+ * No more traffic on this connection. If this is an
+ * initiator connection and we weren't connected yet
+ * then don't send the "connect lost" event.
+ * It's useful to the initiator to know whether we were
+ * logging in at the time so send that information in the
+ * data field.
+ */
+ if (IDM_CONN_ISTGT(ic) ||
+ ((ic->ic_last_state != CS_S1_FREE) &&
+ (ic->ic_last_state != CS_S2_XPT_WAIT))) {
+ (void) idm_notify_client(ic, CN_CONNECT_LOST,
+ (uintptr_t)(ic->ic_last_state == CS_S4_IN_LOGIN));
+ }
/* Abort all tasks */
idm_task_abort(ic, NULL, AT_INTERNAL_ABORT);
@@ -1403,6 +1442,9 @@
* for now lets just call the client's notify function and return
* the status.
*/
+ cn = (cn > CN_MAX) ? CN_MAX : cn;
+ IDM_SM_LOG(CE_NOTE, "idm_notify_client: ic=%p %s(%d)\n",
+ (void *)ic, idm_cn_strings[cn], cn);
return ((*ic->ic_conn_ops.icb_client_notify)(ic, cn, data));
}
--- a/usr/src/uts/common/io/idm/idm_impl.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/idm/idm_impl.c Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -90,7 +90,7 @@
break;
case ISCSI_OP_ASYNC_EVENT:
async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
- switch (async_evt->opcode) {
+ switch (async_evt->async_event) {
case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
idm_conn_rx_pdu_event(ic, CE_ASYNC_LOGOUT_RCV,
(uintptr_t)pdu);
@@ -330,6 +330,13 @@
idm_conn_t *ic;
idm_status_t rc;
+ mutex_enter(&is->is_mutex);
+ if (!is->is_online) {
+ mutex_exit(&is->is_mutex);
+ return (IDM_STATUS_FAIL);
+ }
+ mutex_exit(&is->is_mutex);
+
ic = idm_conn_create_common(CONN_TYPE_TGT, tt,
&is->is_svc_req.sr_conn_ops);
ic->ic_svc_binding = is;
@@ -350,7 +357,7 @@
idm.idm_tgt_conn_count++;
mutex_exit(&idm.idm_global_mutex);
- return (0);
+ return (IDM_STATUS_SUCCESS);
}
void
@@ -361,8 +368,6 @@
idm.idm_tgt_conn_count--;
mutex_exit(&idm.idm_global_mutex);
- idm_conn_sm_fini(ic);
-
if (ic->ic_transport_private != NULL) {
ic->ic_transport_ops->it_tgt_conn_destroy(ic);
}
@@ -415,6 +420,7 @@
void
idm_conn_destroy_common(idm_conn_t *ic)
{
+ idm_conn_sm_fini(ic);
idm_refcnt_destroy(&ic->ic_refcnt);
cv_destroy(&ic->ic_cv);
mutex_destroy(&ic->ic_mutex);
@@ -519,6 +525,25 @@
}
}
+void
+idm_transport_teardown()
+{
+ idm_transport_type_t type;
+ idm_transport_t *it;
+
+ ASSERT(mutex_owned(&idm.idm_global_mutex));
+
+ /* Caller holds the IDM global mutex */
+ for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
+ it = &idm_transport_list[type];
+ /* If we have an open LDI handle on this driver, close it */
+ if (it->it_ldi_hdl != NULL) {
+ (void) ldi_close(it->it_ldi_hdl, FNDELAY, kcred);
+ it->it_ldi_hdl = NULL;
+ }
+ }
+}
+
/*
* ID pool code. We use this to generate unique structure identifiers without
* searching the existing structures. This avoids the need to lock entire
@@ -933,6 +958,12 @@
list_destroy(&idt->idt_inbufv);
list_destroy(&idt->idt_outbufv);
+ /*
+ * The final call to idm_task_rele may happen with the task
+ * mutex held which may invoke this destructor immediately.
+ * Stall here until the task mutex owner lets go.
+ */
+ mutex_enter(&idt->idt_mutex);
mutex_destroy(&idt->idt_mutex);
}
--- a/usr/src/uts/common/io/idm/idm_so.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/idm/idm_so.c Tue Mar 24 17:50:49 2009 -0600
@@ -70,7 +70,10 @@
static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
-static idm_status_t idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode,
+static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
+ idm_buf_t *idb, uint32_t offset, uint32_t length);
+static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
+static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
@@ -91,7 +94,7 @@
static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
-static idm_status_t idm_so_notice_key_values(idm_conn_t *it,
+static void idm_so_notice_key_values(idm_conn_t *it,
nvlist_t *negotiated_nvl);
static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
idm_transport_caps_t *caps);
@@ -1128,6 +1131,12 @@
idm_buf_t *idb;
/*
+ * There is nothing to cleanup on initiator connections
+ */
+ if (IDM_CONN_ISINI(idt->idt_ic))
+ return (IDM_STATUS_SUCCESS);
+
+ /*
* If this is a target connection, call idm_buf_rx_from_ini_done for
* any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
*
@@ -1187,7 +1196,7 @@
* idm_so_notice_key_values() activates the negotiated key values for
* this connection.
*/
-static idm_status_t
+static void
idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
{
char *nvp_name;
@@ -1218,7 +1227,6 @@
break;
}
}
- return (IDM_STATUS_SUCCESS);
}
@@ -1332,7 +1340,6 @@
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
- idm_task_rele(idt);
/*
* PDUs in a sequence should be in continuously increasing
@@ -1340,11 +1347,15 @@
*/
if (offset != idb->idb_exp_offset) {
IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
+ idm_task_rele(idt);
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
/* Expected next relative buffer offset */
idb->idb_exp_offset += n2h24(bhs->dlength);
+ idt->idt_rx_bytes += n2h24(bhs->dlength);
+
+ idm_task_rele(idt);
/*
* For now call scsi_rsp which will process the data rsp
@@ -1414,6 +1425,7 @@
}
/* Expected next relative offset */
idb->idb_exp_offset += ntoh24(bhs->dlength);
+ idt->idt_rx_bytes += n2h24(bhs->dlength);
/*
* Call the buffer callback when the transfer is complete
@@ -1470,6 +1482,7 @@
* sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
* PDU is associated with the R2T by the Target Transfer Tag (ttt).
*/
+
static void
idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
{
@@ -1477,13 +1490,14 @@
idm_buf_t *idb;
iscsi_rtt_hdr_t *rtt_hdr;
uint32_t data_offset;
+ uint32_t data_length;
ASSERT(ic != NULL);
ASSERT(pdu != NULL);
rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
data_offset = ntohl(rtt_hdr->data_offset);
-
+ data_length = ntohl(rtt_hdr->data_length);
idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
if (idt == NULL) {
@@ -1495,9 +1509,6 @@
/* Find the buffer bound to the task by the iSCSI initiator */
mutex_enter(&idt->idt_mutex);
idb = idm_buf_find(&idt->idt_outbufv, data_offset);
- idt->idt_r2t_ttt = rtt_hdr->ttt;
- /* reset to zero */
- idt->idt_exp_datasn = 0;
if (idb == NULL) {
mutex_exit(&idt->idt_mutex);
idm_task_rele(idt);
@@ -1506,8 +1517,22 @@
return;
}
- (void) idm_so_send_buf_region(idt, ISCSI_OP_SCSI_DATA, idb,
- data_offset, ntohl(rtt_hdr->data_length));
+ /* return buffer contains this data */
+ if (data_offset + data_length > idb->idb_buflen) {
+ /* Overflow */
+ mutex_exit(&idt->idt_mutex);
+ idm_task_rele(idt);
+ IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
+ "buffer");
+ idm_pdu_rx_protocol_error(ic, pdu);
+ return;
+ }
+
+ idt->idt_r2t_ttt = rtt_hdr->ttt;
+ idt->idt_exp_datasn = 0;
+
+ idm_so_send_rtt_data(ic, idt, idb, data_offset,
+ ntohl(rtt_hdr->data_length));
mutex_exit(&idt->idt_mutex);
idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
@@ -1552,6 +1577,8 @@
pdu->isp_iovlen++;
}
+ pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
+
if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
pdu->isp_iovlen, total_len) != 0) {
return (IDM_STATUS_IO);
@@ -1920,7 +1947,20 @@
mutex_enter(&idt->idt_mutex);
idb = idm_buf_find(&idt->idt_outbufv, 0);
mutex_exit(&idt->idt_mutex);
- idb->idb_xfer_len += pdu->isp_datalen;
+ /*
+ * If the initiator call to idm_buf_alloc
+ * failed then we can get to this point
+ * without a bound buffer. The associated
+ * connection failure will clean things up
+ * later. It would be nice to come up with
+ * a cleaner way to handle this. In
+ * particular it seems absurd to look up
+ * the task and the buffer just to update
+ * this counter.
+ */
+ if (idb)
+ idb->idb_xfer_len += pdu->isp_datalen;
+ idm_task_rele(idt);
}
}
@@ -2119,7 +2159,9 @@
static idm_status_t
idm_so_buf_setup(idm_buf_t *idb)
{
- /* nothing to do here */
+ /* Ensure bufalloc'd flag is unset */
+ idb->idb_bufalloc = B_FALSE;
+
return (IDM_STATUS_SUCCESS);
}
@@ -2136,8 +2178,95 @@
kmem_free(idb->idb_buf, idb->idb_buflen);
}
-idm_status_t
-idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode, idm_buf_t *idb,
+static void
+idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
+ uint32_t offset, uint32_t length)
+{
+ idm_so_conn_t *so_conn = ic->ic_transport_private;
+ idm_pdu_t tmppdu;
+ idm_buf_t *rtt_buf;
+
+ ASSERT(mutex_owned(&idt->idt_mutex));
+
+ /*
+ * Allocate a buffer to represent the RTT transfer. We could further
+ * optimize this by allocating the buffers internally from an rtt
+ * specific buffer cache since this is socket-specific code but for
+ * now we will keep it simple.
+ */
+ rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
+ if (rtt_buf == NULL) {
+ /*
+ * If we're in FFP then the failure was likely a resource
+ * allocation issue and we should close the connection by
+ * sending a CE_TRANSPORT_FAIL event.
+ *
+ * If we're not in FFP then idm_buf_alloc will always
+ * fail and the state is transitioning to "complete" anyway
+ * so we won't bother to send an event.
+ */
+ mutex_enter(&ic->ic_state_mutex);
+ if (ic->ic_ffp)
+ idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
+ NULL, CT_NONE);
+ mutex_exit(&ic->ic_state_mutex);
+ return;
+ }
+
+ rtt_buf->idb_buf_cb = NULL;
+ rtt_buf->idb_cb_arg = NULL;
+ rtt_buf->idb_bufoffset = offset;
+ rtt_buf->idb_xfer_len = length;
+ rtt_buf->idb_ic = idt->idt_ic;
+ rtt_buf->idb_task_binding = idt;
+
+ /*
+ * Put the idm_buf_t on the tx queue. It will be transmitted by
+ * idm_sotx_thread.
+ */
+ mutex_enter(&so_conn->ic_tx_mutex);
+
+ if (!so_conn->ic_tx_thread_running) {
+ idm_buf_free(rtt_buf);
+ mutex_exit(&so_conn->ic_tx_mutex);
+ return;
+ }
+
+ /*
+ * This new buffer represents an additional reference on the task
+ */
+ idm_task_hold(idt);
+
+ /*
+ * Build a template for the data PDU headers we will use so that
+ * the SN values will stay consistent with other PDU's we are
+ * transmitting like R2T and SCSI status.
+ */
+ bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
+ tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
+ (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
+ ISCSI_OP_SCSI_DATA);
+ rtt_buf->idb_tx_thread = B_TRUE;
+ rtt_buf->idb_in_transport = B_TRUE;
+ list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
+ cv_signal(&so_conn->ic_tx_cv);
+ mutex_exit(&so_conn->ic_tx_mutex);
+}
+
+static void
+idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
+{
+ /*
+ * Don't worry about status -- we assume any error handling
+ * is performed by the caller (idm_sotx_thread).
+ */
+ idb->idb_in_transport = B_FALSE;
+ idm_task_rele(idt);
+ idm_buf_free(idb);
+}
+
+static idm_status_t
+idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
uint32_t buf_region_offset, uint32_t buf_region_length)
{
idm_conn_t *ic;
@@ -2146,6 +2275,7 @@
uint32_t data_offset = buf_region_offset;
iscsi_data_hdr_t *bhs;
idm_pdu_t *pdu;
+ idm_status_t tx_status;
ASSERT(mutex_owned(&idt->idt_mutex));
@@ -2173,30 +2303,13 @@
pdu->isp_ic = ic;
/*
- * For target we've already built a build a header template
+ * We've already built a build a header template
* to use during the transfer. Use this template so that
* the SN values stay consistent with any unrelated PDU's
* being transmitted.
*/
- if (opcode == ISCSI_OP_SCSI_DATA_RSP) {
- bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
- sizeof (iscsi_hdr_t));
- } else {
- /*
- * OK for now, but we should remove this bzero and
- * make sure the build_hdr function is initializing the
- * header properly
- */
- bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
-
- /*
- * setup iscsi data hdr
- * callback to the iSCSI layer to fill in the BHS
- * CmdSN, StatSN, ExpCmdSN, MaxCmdSN, TTT, ITT and
- * opcode
- */
- (*ic->ic_conn_ops.icb_build_hdr)(idt, pdu, opcode);
- }
+ bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
+ sizeof (iscsi_hdr_t));
/*
* Set DataSN, data offset, and flags in BHS
@@ -2231,9 +2344,13 @@
* Transmit the PDU. Call the internal routine directly
* as there is already implicit ordering.
*/
- (void) idm_i_so_tx(pdu);
+ if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
+ mutex_enter(&idt->idt_mutex);
+ return (tx_status);
+ }
mutex_enter(&idt->idt_mutex);
+ idt->idt_tx_bytes += chunk;
}
return (IDM_STATUS_SUCCESS);
@@ -2372,17 +2489,23 @@
mutex_enter(&idt->idt_mutex);
status = idm_so_send_buf_region(idt,
- ISCSI_OP_SCSI_DATA_RSP, idb, 0, idb->idb_xfer_len);
+ idb, 0, idb->idb_xfer_len);
/*
* TX thread owns the buffer so we expect it to
* be "in transport"
*/
ASSERT(idb->idb_in_transport);
- /*
- * idm_buf_tx_to_ini_done releases idt->idt_mutex
- */
- idm_buf_tx_to_ini_done(idt, idb, status);
+ if (IDM_CONN_ISTGT(ic)) {
+ /*
+ * idm_buf_tx_to_ini_done releases
+ * idt->idt_mutex
+ */
+ idm_buf_tx_to_ini_done(idt, idb, status);
+ } else {
+ idm_so_send_rtt_data_done(idt, idb);
+ mutex_exit(&idt->idt_mutex);
+ }
break;
}
@@ -2428,10 +2551,17 @@
* be "in transport"
*/
ASSERT(idb->idb_in_transport);
- /*
- * idm_buf_tx_to_ini_done releases idt->idt_mutex
- */
- idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
+ if (IDM_CONN_ISTGT(ic)) {
+ /*
+ * idm_buf_tx_to_ini_done releases
+ * idt->idt_mutex
+ */
+ idm_buf_tx_to_ini_done(idt, idb,
+ IDM_STATUS_ABORTED);
+ } else {
+ idm_so_send_rtt_data_done(idt, idb);
+ mutex_exit(&idt->idt_mutex);
+ }
mutex_enter(&so_conn->ic_tx_mutex);
break;
}
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c Tue Mar 24 17:50:49 2009 -0600
@@ -29,11 +29,10 @@
/*
* Framework interface routines for iSCSI
*/
-#include "iscsi.h" /* main header */
+
+#include "iscsi.h" /* main header */
+#include <sys/iscsi_protocol.h> /* protocol structs */
#include <sys/scsi/adapters/iscsi_if.h> /* ioctl interfaces */
-#include <sys/iscsi_protocol.h>
-/* protocol structs and defines */
-
#include "iscsi_targetparam.h"
#include "persistent.h"
#include <sys/scsi/adapters/iscsi_door.h>
@@ -60,6 +59,7 @@
int iscsi_nop_delay = ISCSI_DEFAULT_NOP_DELAY;
int iscsi_rx_window = ISCSI_DEFAULT_RX_WINDOW;
int iscsi_rx_max_window = ISCSI_DEFAULT_RX_MAX_WINDOW;
+boolean_t iscsi_logging = B_FALSE;
extern ib_boot_prop_t *iscsiboot_prop;
@@ -386,6 +386,10 @@
mutex_init(&ihp->hba_discovery_events_mutex, NULL,
MUTEX_DRIVER, NULL);
+ /* Get LDI ident */
+ rval = ldi_ident_from_dip(dip, &ihp->hba_li);
+ ASSERT(rval == 0); /* Failure indicates invalid arg */
+
/*
* init SendTargets semaphore that is used to allow
* only one operation at a time
@@ -686,6 +690,9 @@
ddi_remove_minor_node(dip, NULL);
ddi_prop_remove_all(ihp->hba_dip);
+
+ ldi_ident_release(ihp->hba_li);
+
mutex_destroy(&ihp->hba_discovery_events_mutex);
rw_destroy(&ihp->hba_sess_list_rwlock);
(void) iscsi_hba_kstat_term(ihp);
@@ -823,6 +830,9 @@
icmdp->cmd_misc_flags |= ISCSI_CMD_MISCFLAG_XARQ;
}
+
+ idm_sm_audit_init(&icmdp->cmd_state_audit);
+
mutex_init(&icmdp->cmd_mutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&icmdp->cmd_completion, NULL, CV_DRIVER, NULL);
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h Tue Mar 24 17:50:49 2009 -0600
@@ -47,11 +47,13 @@
#include <sys/nvpair.h>
#include <sys/sdt.h>
+#include <sys/iscsi_protocol.h>
#include <sys/scsi/adapters/iscsi_if.h>
-#include <sys/iscsi_protocol.h>
#include <iscsiAuthClient.h>
#include <iscsi_stats.h>
#include <iscsi_thread.h>
+#include <sys/idm/idm.h>
+#include <sys/idm/idm_conn_sm.h>
#include <nvfile.h>
#ifndef MIN
@@ -68,12 +70,24 @@
#define LOGIN_PDU_BUFFER_SIZE (16 * 1024) /* move somewhere else */
+extern boolean_t iscsi_conn_logging;
+extern boolean_t iscsi_io_logging;
+extern boolean_t iscsi_login_logging;
+extern boolean_t iscsi_logging;
+extern boolean_t iscsi_sess_logging;
+#define ISCSI_CONN_LOG if (iscsi_conn_logging) cmn_err
+#define ISCSI_IO_LOG if (iscsi_io_logging) cmn_err
+#define ISCSI_LOGIN_LOG if (iscsi_login_logging) cmn_err
+#define ISCSI_LOG if (iscsi_logging) cmn_err
+#define ISCSI_SESS_LOG if (iscsi_sess_logging) cmn_err
+
/*
* Name Format of the different Task Queues
*/
#define ISCSI_SESS_IOTH_NAME_FORMAT "io_thrd_%d.%d"
#define ISCSI_SESS_WD_NAME_FORMAT "wd_thrd_%d.%d"
#define ISCSI_SESS_LOGIN_TASKQ_NAME_FORMAT "login_taskq_%d.%d"
+#define ISCSI_CONN_CN_TASKQ_NAME_FORMAT "conn_cn_taskq_%d.%d.%d"
#define ISCSI_CONN_RXTH_NAME_FORMAT "rx_thrd_%d.%d.%d"
#define ISCSI_CONN_TXTH_NAME_FORMAT "tx_thrd_%d.%d.%d"
@@ -175,7 +189,9 @@
/* session/connection needs to shutdown */
ISCSI_STATUS_SHUTDOWN,
/* logical unit in use */
- ISCSI_STATUS_BUSY
+ ISCSI_STATUS_BUSY,
+ /* Login on connection failed, retries exceeded */
+ ISCSI_STATUS_LOGIN_TIMED_OUT
} iscsi_status_t;
#define ISCSI_SUCCESS(status) (status == ISCSI_STATUS_SUCCESS)
@@ -283,7 +299,6 @@
*/
typedef enum iscsi_cmd_type {
ISCSI_CMD_TYPE_SCSI = 1, /* scsi cmd */
- ISCSI_CMD_TYPE_R2T, /* r2t */
ISCSI_CMD_TYPE_NOP, /* nop / ping */
ISCSI_CMD_TYPE_ABORT, /* abort */
ISCSI_CMD_TYPE_RESET, /* reset */
@@ -296,32 +311,64 @@
* iscsi_cmd_state - (reference iscsi_cmd.c for state diagram)
*/
typedef enum iscsi_cmd_state {
- ISCSI_CMD_STATE_FREE,
+ ISCSI_CMD_STATE_FREE = 0,
ISCSI_CMD_STATE_PENDING,
ISCSI_CMD_STATE_ACTIVE,
ISCSI_CMD_STATE_ABORTING,
- ISCSI_CMD_STATE_COMPLETED
+ ISCSI_CMD_STATE_IDM_ABORTING,
+ ISCSI_CMD_STATE_COMPLETED,
+ ISCSI_CMD_STATE_MAX
} iscsi_cmd_state_t;
+#ifdef ISCSI_CMD_SM_STRINGS
+static const char *iscsi_cmd_state_names[ISCSI_CMD_STATE_MAX+1] = {
+ "ISCSI_CMD_STATE_FREE",
+ "ISCSI_CMD_STATE_PENDING",
+ "ISCSI_CMD_STATE_ACTIVE",
+ "ISCSI_CMD_STATE_ABORTING",
+ "ISCSI_CMD_STATE_IDM_ABORTING",
+ "ISCSI_CMD_STATE_COMPLETED",
+ "ISCSI_CMD_STATE_MAX"
+};
+#endif
+
/*
* iscsi command events
*/
typedef enum iscsi_cmd_event {
- ISCSI_CMD_EVENT_E1,
+ ISCSI_CMD_EVENT_E1 = 0,
ISCSI_CMD_EVENT_E2,
ISCSI_CMD_EVENT_E3,
ISCSI_CMD_EVENT_E4,
ISCSI_CMD_EVENT_E6,
ISCSI_CMD_EVENT_E7,
- ISCSI_CMD_EVENT_E8
+ ISCSI_CMD_EVENT_E8,
+ ISCSI_CMD_EVENT_E9,
+ ISCSI_CMD_EVENT_E10,
+ ISCSI_CMD_EVENT_MAX
} iscsi_cmd_event_t;
+#ifdef ISCSI_CMD_SM_STRINGS
+static const char *iscsi_cmd_event_names[ISCSI_CMD_EVENT_MAX+1] = {
+ "ISCSI_CMD_EVENT_E1",
+ "ISCSI_CMD_EVENT_E2",
+ "ISCSI_CMD_EVENT_E3",
+ "ISCSI_CMD_EVENT_E4",
+ "ISCSI_CMD_EVENT_E6",
+ "ISCSI_CMD_EVENT_E7",
+ "ISCSI_CMD_EVENT_E8",
+ "ISCSI_CMD_EVENT_E9",
+ "ISCSI_CMD_EVENT_E10",
+ "ISCSI_CMD_EVENT_MAX"
+};
+#endif
+
/*
* iscsi text command stages - these stages are used by iSCSI text
* processing to manage long resonses.
*/
typedef enum iscsi_cmd_text_stage {
- ISCSI_CMD_TEXT_INITIAL_REQ,
+ ISCSI_CMD_TEXT_INITIAL_REQ = 0,
ISCSI_CMD_TEXT_CONTINUATION,
ISCSI_CMD_TEXT_FINAL_RSP
} iscsi_cmd_text_stage_t;
@@ -349,12 +396,16 @@
clock_t cmd_lbolt_pending;
clock_t cmd_lbolt_active;
clock_t cmd_lbolt_aborting;
+ clock_t cmd_lbolt_idm_aborting;
clock_t cmd_lbolt_timeout;
uint8_t cmd_misc_flags;
+ idm_task_t *cmd_itp;
union {
/* ISCSI_CMD_TYPE_SCSI */
struct {
+ idm_buf_t *ibp_ibuf;
+ idm_buf_t *ibp_obuf;
struct scsi_pkt *pkt;
struct buf *bp;
int cmdlen;
@@ -443,6 +494,9 @@
kmutex_t cmd_mutex;
kcondvar_t cmd_completion;
+ idm_pdu_t cmd_pdu;
+
+ sm_audit_buf_t cmd_state_audit;
} iscsi_cmd_t;
@@ -480,40 +534,6 @@
#define ISCSI_LUN_CAP_RESET 0x01
/*
- * iscsi_conn_state - (reference iscsi_conn.c for state diagram)
- */
-typedef enum iscsi_conn_state {
- ISCSI_CONN_STATE_FREE,
- ISCSI_CONN_STATE_IN_LOGIN,
- ISCSI_CONN_STATE_LOGGED_IN,
- ISCSI_CONN_STATE_IN_LOGOUT,
- ISCSI_CONN_STATE_FAILED,
- ISCSI_CONN_STATE_POLLING
-} iscsi_conn_state_t;
-
-#define ISCSI_CONN_STATE_FULL_FEATURE(state) \
- ((state == ISCSI_CONN_STATE_LOGGED_IN) || \
- (state == ISCSI_CONN_STATE_IN_LOGOUT))
-
-/*
- * iscsi connection events - (reference iscsi_conn.c for state diagram)
- */
-typedef enum iscsi_conn_event {
- ISCSI_CONN_EVENT_T1,
- ISCSI_CONN_EVENT_T5,
- ISCSI_CONN_EVENT_T7,
- ISCSI_CONN_EVENT_T8,
- ISCSI_CONN_EVENT_T9,
- ISCSI_CONN_EVENT_T11,
- ISCSI_CONN_EVENT_T12,
- ISCSI_CONN_EVENT_T13,
- ISCSI_CONN_EVENT_T14,
- ISCSI_CONN_EVENT_T15,
- ISCSI_CONN_EVENT_T17,
- ISCSI_CONN_EVENT_T30
-} iscsi_conn_event_t;
-
-/*
*
*
*/
@@ -537,6 +557,61 @@
#define SIZEOF_SOCKADDR(so) ((so)->sa_family == AF_INET ? \
sizeof (struct sockaddr_in) : sizeof (struct sockaddr_in6))
+typedef enum {
+ LOGIN_START,
+ LOGIN_READY,
+ LOGIN_TX,
+ LOGIN_RX,
+ LOGIN_ERROR,
+ LOGIN_DONE,
+ LOGIN_FFP,
+ LOGIN_MAX
+} iscsi_login_state_t;
+
+#ifdef ISCSI_LOGIN_STATE_NAMES
+static const char *iscsi_login_state_names[LOGIN_MAX+1] = {
+ "LOGIN_START",
+ "LOGIN_READY",
+ "LOGIN_TX",
+ "LOGIN_RX",
+ "LOGIN_ERROR",
+ "LOGIN_DONE",
+ "LOGIN_FFP",
+ "LOGIN_MAX"
+};
+#endif
+
+/*
+ * iscsi_conn_state
+ */
+typedef enum iscsi_conn_state {
+ ISCSI_CONN_STATE_UNDEFINED = 0,
+ ISCSI_CONN_STATE_FREE,
+ ISCSI_CONN_STATE_IN_LOGIN,
+ ISCSI_CONN_STATE_LOGGED_IN,
+ ISCSI_CONN_STATE_IN_LOGOUT,
+ ISCSI_CONN_STATE_FAILED,
+ ISCSI_CONN_STATE_POLLING,
+ ISCSI_CONN_STATE_MAX
+} iscsi_conn_state_t;
+
+#ifdef ISCSI_ICS_NAMES
+static const char *iscsi_ics_name[ISCSI_CONN_STATE_MAX+1] = {
+ "ISCSI_CONN_STATE_UNDEFINED",
+ "ISCSI_CONN_STATE_FREE",
+ "ISCSI_CONN_STATE_IN_LOGIN",
+ "ISCSI_CONN_STATE_LOGGED_IN",
+ "ISCSI_CONN_STATE_IN_LOGOUT",
+ "ISCSI_CONN_STATE_FAILED",
+ "ISCSI_CONN_STATE_POLLING",
+ "ISCSI_CONN_STATE_MAX"
+};
+#endif
+
+#define ISCSI_CONN_STATE_FULL_FEATURE(state) \
+ ((state == ISCSI_CONN_STATE_LOGGED_IN) || \
+ (state == ISCSI_CONN_STATE_IN_LOGOUT))
+
/*
* iSCSI Connection Structure
*/
@@ -547,21 +622,23 @@
iscsi_conn_state_t conn_state; /* cur. conn. driver state */
iscsi_conn_state_t conn_prev_state; /* prev. conn. driver state */
- clock_t conn_state_lbolt;
/* protects the session state and synchronizes the state machine */
kmutex_t conn_state_mutex;
kcondvar_t conn_state_change;
boolean_t conn_state_destroy;
+ boolean_t conn_state_ffp;
+ boolean_t conn_state_idm_connected;
+ boolean_t conn_async_logout;
+ ddi_taskq_t *conn_cn_taskq;
- void *conn_socket; /* kernel socket */
+ idm_conn_t *conn_ic;
- /* base connection information */
+ /* base connection information, may have been redirected */
iscsi_sockaddr_t conn_base_addr;
/* current connection information, may have been redirected */
iscsi_sockaddr_t conn_curr_addr;
- /* current connection information, may have been redirected */
boolean_t conn_bound;
iscsi_sockaddr_t conn_bound_addr;
@@ -581,12 +658,12 @@
* the session's pending queue or aborted.
*/
iscsi_queue_t conn_queue_active;
+ iscsi_queue_t conn_queue_idm_aborting;
/* lbolt from the last receive, used for nop processing */
clock_t conn_rx_lbolt;
clock_t conn_nop_lbolt;
- iscsi_thread_t *conn_rx_thread;
iscsi_thread_t *conn_tx_thread;
/*
@@ -610,6 +687,19 @@
} stats;
/*
+ * These fields are used to coordinate the asynchronous IDM
+ * PDU operations with the synchronous login code.
+ */
+ kmutex_t conn_login_mutex;
+ kcondvar_t conn_login_cv;
+ iscsi_login_state_t conn_login_state;
+ iscsi_status_t conn_login_status;
+ iscsi_hdr_t conn_login_resp_hdr;
+ char *conn_login_data;
+ int conn_login_datalen;
+ int conn_login_max_data_length;
+
+ /*
* login min and max identify the amount of time
* in lbolt that iscsi_start_login() should attempt
* to log into a target portal. The login will
@@ -621,35 +711,60 @@
*/
clock_t conn_login_min;
clock_t conn_login_max;
+ sm_audit_buf_t conn_state_audit;
} iscsi_conn_t;
/*
- * iscsi_conn_state - (reference iscsi_sess.c for state diagram)
+ * iscsi_sess_state - (reference iscsi_sess.c for state diagram)
*/
typedef enum iscsi_sess_state {
- ISCSI_SESS_STATE_FREE,
+ ISCSI_SESS_STATE_FREE = 0,
ISCSI_SESS_STATE_LOGGED_IN,
ISCSI_SESS_STATE_FAILED,
ISCSI_SESS_STATE_IN_FLUSH,
- ISCSI_SESS_STATE_FLUSHED
+ ISCSI_SESS_STATE_FLUSHED,
+ ISCSI_SESS_STATE_MAX
} iscsi_sess_state_t;
+#ifdef ISCSI_SESS_SM_STRINGS
+static const char *iscsi_sess_state_names[ISCSI_SESS_STATE_MAX+1] = {
+ "ISCSI_SESS_STATE_FREE",
+ "ISCSI_SESS_STATE_LOGGED_IN",
+ "ISCSI_SESS_STATE_FAILED",
+ "ISCSI_SESS_STATE_IN_FLUSH",
+ "ISCSI_SESS_STATE_FLUSHED",
+ "ISCSI_SESS_STATE_MAX"
+};
+#endif
+
#define ISCSI_SESS_STATE_FULL_FEATURE(state) \
((state == ISCSI_SESS_STATE_LOGGED_IN) || \
(state == ISCSI_SESS_STATE_IN_FLUSH))
typedef enum iscsi_sess_event {
- ISCSI_SESS_EVENT_N1,
+ ISCSI_SESS_EVENT_N1 = 0,
ISCSI_SESS_EVENT_N3,
ISCSI_SESS_EVENT_N5,
ISCSI_SESS_EVENT_N6,
- ISCSI_SESS_EVENT_N7
+ ISCSI_SESS_EVENT_N7,
+ ISCSI_SESS_EVENT_MAX
} iscsi_sess_event_t;
+#ifdef ISCSI_SESS_SM_STRINGS
+static const char *iscsi_sess_event_names[ISCSI_SESS_EVENT_MAX+1] = {
+ "ISCSI_SESS_EVENT_N1",
+ "ISCSI_SESS_EVENT_N3",
+ "ISCSI_SESS_EVENT_N5",
+ "ISCSI_SESS_EVENT_N6",
+ "ISCSI_SESS_EVENT_N7",
+ "ISCSI_SESS_EVENT_MAX"
+};
+#endif
+
typedef enum iscsi_sess_type {
- ISCSI_SESS_TYPE_NORMAL,
+ ISCSI_SESS_TYPE_NORMAL = 0,
ISCSI_SESS_TYPE_DISCOVERY
} iscsi_sess_type_t;
@@ -841,6 +956,7 @@
iscsi_thread_t *sess_wd_thread;
+ sm_audit_buf_t sess_state_audit;
} iscsi_sess_t;
/*
@@ -853,6 +969,15 @@
} iscsi_sess_list_t;
/*
+ * iSCSI client notify task context for deferred IDM notifications processing
+ */
+typedef struct iscsi_cn_task {
+ idm_conn_t *ct_ic;
+ idm_client_notify_t ct_icn;
+ uintptr_t ct_data;
+} iscsi_cn_task_t;
+
+/*
* iscsi_network
*/
typedef struct iscsi_network {
@@ -906,6 +1031,7 @@
uint32_t hba_sig;
dev_info_t *hba_dip; /* dev info ptr */
scsi_hba_tran_t *hba_tran; /* scsi tran ptr */
+ ldi_ident_t hba_li;
struct iscsi_sess *hba_sess_list; /* sess. list for hba */
krwlock_t hba_sess_list_rwlock; /* protect sess. list */
@@ -965,17 +1091,26 @@
* +--------------------------------------------------------------------+
*/
+/* IDM client callback entry points */
+idm_rx_pdu_cb_t iscsi_rx_scsi_rsp;
+idm_rx_pdu_cb_t iscsi_rx_misc_pdu;
+idm_rx_pdu_error_cb_t iscsi_rx_error_pdu;
+idm_build_hdr_cb_t iscsi_build_hdr;
+idm_task_cb_t iscsi_task_aborted;
+idm_client_notify_cb_t iscsi_client_notify;
+
/* iscsi_io.c */
int iscsi_sna_lte(uint32_t n1, uint32_t n2);
char *iscsi_get_next_text(char *data, int data_length, char *curr_text);
void iscsi_ic_thread(iscsi_thread_t *thread, void *arg);
void iscsi_tx_thread(iscsi_thread_t *thread, void *arg);
-void iscsi_rx_thread(iscsi_thread_t *thread, void *arg);
void iscsi_wd_thread(iscsi_thread_t *thread, void *arg);
iscsi_status_t iscsi_tx_cmd(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
+void iscsi_task_cleanup(int opcode, iscsi_cmd_t *icmdp);
+
void iscsi_handle_abort(void *arg);
iscsi_status_t iscsi_handle_reset(iscsi_sess_t *isp, int level,
iscsi_lun_t *ilp);
@@ -999,6 +1134,8 @@
void iscsi_dequeue_pending_cmd(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
void iscsi_enqueue_active_cmd(iscsi_conn_t *icp, iscsi_cmd_t *icmdp);
void iscsi_dequeue_active_cmd(iscsi_conn_t *icp, iscsi_cmd_t *icmdp);
+void iscsi_enqueue_idm_aborting_cmd(iscsi_conn_t *icp, iscsi_cmd_t *icmdp);
+void iscsi_dequeue_idm_aborting_cmd(iscsi_conn_t *icp, iscsi_cmd_t *icmdp);
void iscsi_enqueue_completed_cmd(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
iscsi_status_t iscsi_dequeue_cmd(iscsi_cmd_t **, iscsi_cmd_t **, iscsi_cmd_t *);
void iscsi_move_queue(iscsi_queue_t *src_queue, iscsi_queue_t *dst_queue);
@@ -1007,6 +1144,11 @@
/* iscsi_login.c */
iscsi_status_t iscsi_login_start(void *arg);
+void iscsi_login_update_state(iscsi_conn_t *icp,
+ iscsi_login_state_t next_state);
+void iscsi_login_update_state_locked(iscsi_conn_t *icp,
+ iscsi_login_state_t next_state);
+
/* iscsi_stats.c */
boolean_t iscsi_hba_kstat_init(struct iscsi_hba *ihp);
@@ -1019,6 +1161,7 @@
/* iscsi_net.c */
void iscsi_net_init();
void iscsi_net_fini();
+iscsi_status_t iscsi_net_interface();
/* iscsi_sess.c */
iscsi_sess_t *iscsi_sess_create(iscsi_hba_t *ihp,
@@ -1031,6 +1174,8 @@
void iscsi_sess_state_machine(iscsi_sess_t *isp, iscsi_sess_event_t event);
char *iscsi_sess_state_str(iscsi_sess_state_t state);
boolean_t iscsi_sess_set_auth(iscsi_sess_t *isp);
+iscsi_status_t iscsi_sess_reserve_scsi_itt(iscsi_cmd_t *icmdp);
+void iscsi_sess_release_scsi_itt(iscsi_cmd_t *icmdp);
iscsi_status_t iscsi_sess_reserve_itt(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
void iscsi_sess_release_itt(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
void iscsi_sess_redrive_io(iscsi_sess_t *isp);
@@ -1041,13 +1186,15 @@
/* iscsi_conn.c */
iscsi_status_t iscsi_conn_create(struct sockaddr *addr, iscsi_sess_t *isp,
iscsi_conn_t **icpp);
+iscsi_status_t iscsi_conn_online(iscsi_conn_t *icp);
iscsi_status_t iscsi_conn_offline(iscsi_conn_t *icp);
iscsi_status_t iscsi_conn_destroy(iscsi_conn_t *icp);
-iscsi_status_t iscsi_conn_state_machine(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-char *iscsi_conn_state_str(iscsi_conn_state_t state);
void iscsi_conn_set_login_min_max(iscsi_conn_t *icp, int min, int max);
iscsi_status_t iscsi_conn_sync_params(iscsi_conn_t *icp);
+void iscsi_conn_retry(iscsi_sess_t *isp, iscsi_conn_t *icp);
+void iscsi_conn_update_state(iscsi_conn_t *icp, iscsi_conn_state_t next_state);
+void iscsi_conn_update_state_locked(iscsi_conn_t *icp,
+ iscsi_conn_state_t next_state);
/* iscsi_lun.c */
iscsi_status_t iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num,
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* iSCSI command interfaces
@@ -36,6 +36,8 @@
iscsi_cmd_event_t event, void *arg);
static void iscsi_cmd_state_aborting(iscsi_cmd_t *icmdp,
iscsi_cmd_event_t event, void *arg);
+static void iscsi_cmd_state_idm_aborting(iscsi_cmd_t *icmdp,
+ iscsi_cmd_event_t event, void *arg);
static void iscsi_cmd_state_completed(iscsi_cmd_t *icmdp,
iscsi_cmd_event_t event, void *arg);
static char *iscsi_cmd_state_str(iscsi_cmd_state_t state);
@@ -87,6 +89,8 @@
* out or been requested to abort by an upper layer
* driver. At this point there is a task management
* command in the active queue trying to abort the task.
+ * C4': IDM ABORTING - SCSI command is owned by IDM and idm_task_abort
+ * has been called for this command.
* C5: COMPLETED - Command which is ready to complete via pkt callback.
*
* The state diagram is as follows:
@@ -100,39 +104,45 @@
* N+--------/ C2 \ |
* A| E4/6/7\ /-------- |
* L| ---+--- E4/6/7| |
- * | |E2 | |
+ * | |E2 E10 | |
* C| V | S |
* M| _______ | C |
* D+--------/ C3 \ | S |
* S E3/4/6/7\ /-------+ I |
- * ---+---E3/4/6/7| |
- * E4/6| | C |
- * V | M |
- * ------- | D |
- * - >/ C4 \ | S |
- * / \ /-------+ |
- * | ---+---E3/6/7 | |
- * | E4| V /E8
- * ------ -------
- * / C5 \
- * \ /
- * ---+---
+ * /---+---E3/4/6/7| |
+ * / | E9/10| |
+ * ------/ E4/6| | C |
+ * | V | M |
+ * E7| ------- | D |
+ * SCSI| - >/ C4 \ | S |
+ * | / \ /-------+ |
+ * | | ---+---E3/6/7/9| |
+ * | | E4| | V /E8
+ * | ------ | -------
+ * +-\ / / C5 \
+ * V \-------/ /---->\ /
+ * ------- E7 / ---+---
+ * / C4' \ /
+ * \ /------/ E9
+ * -------
*
* The state transition table is as follows:
*
- * +---------+---+---+-----+---------+
- * |C1 |C2 |C3 |C4 |C5 |
- * ---+---------+---+---+-----+---------+
- * C1| - |E1 | - | - | |
- * ---+---------+---+---+-----+---------+
- * C2|E4/6/7 |- |E2 | - |E4/6/7 |
- * ---+---------+---+---+-----+---------+
- * C3|E3/4/6/7 |- |- |E4/6 |E3/4/6/7 |
- * ---+---------+---+---+-----+---------+
- * C4| |- |- |E4 |E3/6/7 |
- * ---+---------+---+---+-----+---------+
- * C5|E8 | | | | |
- * ---+---------+---+---+-----+---------+
+ * +---------+---+---+-----+----+--------------+
+ * |C1 |C2 |C3 |C4 |C4' |C5 |
+ * ---+---------+---+---+-----+----+--------------+
+ * C1| - |E1 | - | - | - | |
+ * ---+---------+---+---+-----+----+--------------+
+ * C2|E4/6/7 |- |E2 | - | - |E4/6/7/10 |
+ * ---+---------+---+---+-----+----+--------------+
+ * C3|E3/4/6/7 |- |- |E4/6 |E7 |E3/4/6/7/9/10 |
+ * ---+---------+---+---+-----+----+--------------+
+ * C4| |- |- |E4 |E7 |E3/6/7/9 |
+ * ---+---------+---+---+-----+----+--------------+
+ * C4'| |- |- |- |- |E9 |
+ * ---+---------+---+---+-----+----+--------------+
+ * C5|E8 | | | | | |
+ * ---+---------+---+---+-----+----+--------------+
*
* Event definitions:
*
@@ -165,6 +175,8 @@
* -E8: Command has completed
* - Only SCSI cmds should receive these events
* and reach the command state.
+ * -E9: Callback received for previous idm_task_abort request
+ * -E10: The command this abort was associated with has terminated on its own
*/
void
iscsi_cmd_state_machine(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg)
@@ -179,6 +191,11 @@
char *, iscsi_cmd_event_str(event));
mutex_enter(&icmdp->cmd_mutex);
+
+ /* Audit event */
+ idm_sm_audit_event(&icmdp->cmd_state_audit,
+ SAS_ISCSI_CMD, icmdp->cmd_state, event, (uintptr_t)arg);
+
icmdp->cmd_prev_state = icmdp->cmd_state;
switch (icmdp->cmd_state) {
case ISCSI_CMD_STATE_FREE:
@@ -197,6 +214,10 @@
iscsi_cmd_state_aborting(icmdp, event, arg);
break;
+ case ISCSI_CMD_STATE_IDM_ABORTING:
+ iscsi_cmd_state_idm_aborting(icmdp, event, arg);
+ break;
+
case ISCSI_CMD_STATE_COMPLETED:
iscsi_cmd_state_completed(icmdp, event, arg);
@@ -213,6 +234,10 @@
}
if (release_lock == B_TRUE) {
+ /* Audit state if not completed */
+ idm_sm_audit_state_change(&icmdp->cmd_state_audit,
+ SAS_ISCSI_CMD, icmdp->cmd_prev_state, icmdp->cmd_state);
+
if (!(icmdp->cmd_misc_flags & ISCSI_CMD_MISCFLAG_FREE) ||
!(icmdp->cmd_misc_flags &
ISCSI_CMD_MISCFLAG_INTERNAL)) {
@@ -239,6 +264,7 @@
icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
icmdp->cmd_conn = icp;
icmdp->cmd_misc_flags |= ISCSI_CMD_MISCFLAG_INTERNAL;
+ idm_sm_audit_init(&icmdp->cmd_state_audit);
mutex_init(&icmdp->cmd_mutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&icmdp->cmd_completion, NULL, CV_DRIVER, NULL);
}
@@ -258,10 +284,7 @@
ASSERT(icmdp->cmd_next == NULL);
ASSERT(icmdp->cmd_prev == NULL);
ASSERT(icmdp->cmd_misc_flags & ISCSI_CMD_MISCFLAG_INTERNAL);
-
- if (icmdp->cmd_type == ISCSI_CMD_TYPE_R2T)
- ASSERT(icmdp->cmd_un.r2t.icmdp == NULL);
- else if (icmdp->cmd_type == ISCSI_CMD_TYPE_ABORT)
+ if (icmdp->cmd_type == ISCSI_CMD_TYPE_ABORT)
ASSERT(icmdp->cmd_un.abort.icmdp == NULL);
else if (icmdp->cmd_type == ISCSI_CMD_TYPE_SCSI) {
ASSERT(icmdp->cmd_un.scsi.r2t_icmdp == NULL);
@@ -277,8 +300,6 @@
* | Internal Command Interfaces |
* +--------------------------------------------------------------------+
*/
-
-
/*
* iscsi_cmd_state_free -
*
@@ -302,7 +323,7 @@
if (icmdp->cmd_type == ISCSI_CMD_TYPE_SCSI) {
/*
* Establish absolute time when command should timeout.
- * For commands the depend on cmdsn window to go
+ * For commands that depend on cmdsn window to go
* active, the timeout will be ignored while on
* the pending queue and a new timeout will be
* established when the command goes active.
@@ -347,7 +368,6 @@
ASSERT(icmdp != NULL);
ASSERT(icmdp->cmd_state == ISCSI_CMD_STATE_PENDING);
ASSERT(isp != NULL);
- ASSERT(mutex_owned(&isp->sess_queue_pending.mutex));
/* switch on event change */
switch (event) {
@@ -355,6 +375,7 @@
case ISCSI_CMD_EVENT_E2:
/* A connection should have already been assigned */
+ ASSERT(mutex_owned(&isp->sess_queue_pending.mutex));
ASSERT(icmdp->cmd_conn != NULL);
/*
@@ -376,7 +397,7 @@
}
/* assign itt */
- status = iscsi_sess_reserve_itt(isp, icmdp);
+ status = iscsi_sess_reserve_scsi_itt(icmdp);
if (!ISCSI_SUCCESS(status)) {
/* no available itt slots */
mutex_exit(&isp->sess_cmdsn_mutex);
@@ -434,10 +455,6 @@
}
mutex_exit(&isp->sess_cmdsn_mutex);
break;
- case ISCSI_CMD_TYPE_R2T:
- /* no additional resources required */
- free_icmdp = B_TRUE;
- break;
case ISCSI_CMD_TYPE_NOP:
/* assign itt, if needed */
if (icmdp->cmd_itt == ISCSI_RSVD_TASK_TAG) {
@@ -519,7 +536,6 @@
/* remove command from pending queue */
iscsi_dequeue_pending_cmd(isp, icmdp);
-
/* check if expecting a response */
if (free_icmdp == B_FALSE) {
/* response expected, move to active queue */
@@ -556,8 +572,40 @@
}
break;
+ /* -E10: Abort is no longer required for this command */
+ case ISCSI_CMD_EVENT_E10:
+ /*
+ * Acquiring the sess_queue_pending lock while the
+ * conn_queue_active lock is held conflicts with the
+ * locking order in iscsi_cmd_state_pending where
+ * conn_queue_active is acquired while sess_queue_pending
+ * is held. Normally this would be a dangerous lock
+ * order conflict, except that we know that if we are
+ * seeing ISCSI_CMD_EVENT_E10 then the command being
+ * aborted is in "aborting" state and by extension
+ * is not in "pending" state. Therefore the code
+ * path with that alternate lock order will not execute.
+ * That's good because we can't drop the lock here without
+ * risking a deadlock.
+ */
+ ASSERT(mutex_owned(&icmdp->cmd_conn->conn_queue_active.mutex));
+ mutex_enter(&isp->sess_queue_pending.mutex);
+
+ icmdp->cmd_lbolt_aborting = ddi_get_lbolt();
+
+ iscsi_dequeue_pending_cmd(isp, icmdp);
+
+ icmdp->cmd_un.abort.icmdp->cmd_un.scsi.abort_icmdp = NULL;
+ icmdp->cmd_un.abort.icmdp = NULL;
+ icmdp->cmd_misc_flags |= ISCSI_CMD_MISCFLAG_FREE;
+ icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
+
+ mutex_exit(&isp->sess_queue_pending.mutex);
+ break;
+
/* -E4: Command has been requested to abort */
case ISCSI_CMD_EVENT_E4:
+ ASSERT(mutex_owned(&isp->sess_queue_pending.mutex));
icmdp->cmd_lbolt_aborting = ddi_get_lbolt();
ISCSI_CMD_SET_REASON_STAT(icmdp,
@@ -577,6 +625,7 @@
/* -E6: Command has timed out */
case ISCSI_CMD_EVENT_E6:
+ ASSERT(mutex_owned(&isp->sess_queue_pending.mutex));
iscsi_dequeue_pending_cmd(isp, icmdp);
switch (icmdp->cmd_type) {
@@ -592,25 +641,6 @@
iscsi_enqueue_completed_cmd(isp, icmdp);
break;
- case ISCSI_CMD_TYPE_R2T:
- mutex_enter(&icmdp->cmd_un.r2t.icmdp->cmd_mutex);
- icmdp->cmd_un.r2t.icmdp->
- cmd_un.scsi.r2t_icmdp = NULL;
- cv_broadcast(&icmdp->cmd_un.r2t.icmdp->
- cmd_completion);
- mutex_exit(&icmdp->cmd_un.r2t.icmdp->cmd_mutex);
- icmdp->cmd_un.r2t.icmdp = NULL;
-
- /*
- * If this command is timing out then
- * the SCSI command will be timing out
- * also. Just free the memory.
- */
- icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
- icmdp->cmd_misc_flags |=
- ISCSI_CMD_MISCFLAG_FREE;
- break;
-
case ISCSI_CMD_TYPE_NOP:
icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
/*
@@ -715,22 +745,11 @@
switch (icmdp->cmd_type) {
case ISCSI_CMD_TYPE_SCSI:
- iscsi_sess_release_itt(isp, icmdp);
+ iscsi_sess_release_scsi_itt(icmdp);
mutex_exit(&isp->sess_cmdsn_mutex);
iscsi_enqueue_completed_cmd(isp, icmdp);
break;
- case ISCSI_CMD_TYPE_R2T:
- icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
- mutex_exit(&isp->sess_cmdsn_mutex);
- /*
- * R2T commands do not have responses
- * so these command should never be
- * placed in the active queue.
- */
- ASSERT(FALSE);
- break;
-
case ISCSI_CMD_TYPE_NOP:
icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
iscsi_sess_release_itt(isp, icmdp);
@@ -756,20 +775,29 @@
t_icmdp = icmdp->cmd_un.abort.icmdp;
ASSERT(t_icmdp != NULL);
mutex_enter(&t_icmdp->cmd_mutex);
+ t_icmdp->cmd_un.scsi.abort_icmdp = NULL;
if (t_icmdp->cmd_state != ISCSI_CMD_STATE_COMPLETED) {
- mutex_enter(&isp->sess_cmdsn_mutex);
iscsi_dequeue_active_cmd(
t_icmdp->cmd_conn, t_icmdp);
- iscsi_sess_release_itt(isp, t_icmdp);
- mutex_exit(&isp->sess_cmdsn_mutex);
+ mutex_enter(
+ &icp->conn_queue_idm_aborting.mutex);
+ iscsi_enqueue_idm_aborting_cmd(
+ t_icmdp->cmd_conn,
+ t_icmdp);
+ mutex_exit(&icp->conn_queue_idm_aborting.mutex);
+ /*
+ * Complete abort processing after IDM
+ * calls us back. Set the status to use
+ * when we complete the command.
+ */
ISCSI_CMD_SET_REASON_STAT(
t_icmdp, CMD_TIMEOUT, STAT_TIMEOUT);
- iscsi_enqueue_completed_cmd(isp, t_icmdp);
+ idm_task_abort(icp->conn_ic, t_icmdp->cmd_itp,
+ AT_TASK_MGMT_ABORT);
+ } else {
+ cv_broadcast(&t_icmdp->cmd_completion);
}
-
- t_icmdp->cmd_un.scsi.abort_icmdp = NULL;
- cv_broadcast(&t_icmdp->cmd_completion);
mutex_exit(&t_icmdp->cmd_mutex);
icmdp->cmd_un.abort.icmdp = NULL;
@@ -821,7 +849,9 @@
ASSERT(!mutex_owned(&isp->sess_cmdsn_mutex));
break;
- /* -E4: Command has been requested to abort */
+ /* -E10,E4: Command has been requested to abort */
+ case ISCSI_CMD_EVENT_E10:
+ /* FALLTHRU */
case ISCSI_CMD_EVENT_E4:
/* E4 is only for resets and aborts */
@@ -838,15 +868,6 @@
iscsi_handle_abort(icmdp);
break;
- case ISCSI_CMD_TYPE_R2T:
- mutex_enter(&isp->sess_cmdsn_mutex);
- iscsi_dequeue_active_cmd(icmdp->cmd_conn, icmdp);
- mutex_exit(&isp->sess_cmdsn_mutex);
-
- /* should never get in active queue */
- ASSERT(FALSE);
- break;
-
case ISCSI_CMD_TYPE_NOP:
icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
@@ -869,42 +890,60 @@
iscsi_sess_release_itt(isp, icmdp);
mutex_exit(&isp->sess_cmdsn_mutex);
+ /*
+ * If this is an E4 then we may need to deal with
+ * the abort's associated SCSI command. If this
+ * is an E10 then IDM is already cleaning up the
+ * SCSI command and all we need to do is break the
+ * linkage between them and free the abort command.
+ */
t_icmdp = icmdp->cmd_un.abort.icmdp;
ASSERT(t_icmdp != NULL);
- mutex_enter(&t_icmdp->cmd_mutex);
+ if (event != ISCSI_CMD_EVENT_E10) {
- /*
- * If abort command is aborted then we should
- * not act on the parent scsi command. If the
- * abort command timed out then we need to
- * complete the parent command if it still
- * exists with a timeout failure.
- */
- if ((event == ISCSI_CMD_EVENT_E6) &&
- (t_icmdp->cmd_state != ISCSI_CMD_STATE_COMPLETED)) {
+ mutex_enter(&t_icmdp->cmd_mutex);
+ t_icmdp->cmd_un.scsi.abort_icmdp = NULL;
+ /*
+ * If abort command is aborted then we should
+ * not act on the parent scsi command. If the
+ * abort command timed out then we need to
+ * complete the parent command if it still
+ * exists with a timeout failure.
+ */
+ if ((event == ISCSI_CMD_EVENT_E6) &&
+ (t_icmdp->cmd_state !=
+ ISCSI_CMD_STATE_IDM_ABORTING) &&
+ (t_icmdp->cmd_state !=
+ ISCSI_CMD_STATE_COMPLETED)) {
- mutex_enter(&isp->sess_cmdsn_mutex);
- iscsi_dequeue_active_cmd(
- t_icmdp->cmd_conn,
- t_icmdp);
- iscsi_sess_release_itt(isp,
- t_icmdp);
- mutex_exit(&isp->sess_cmdsn_mutex);
-
- ISCSI_CMD_SET_REASON_STAT(t_icmdp,
- CMD_TIMEOUT, STAT_TIMEOUT);
- iscsi_enqueue_completed_cmd(isp,
- t_icmdp);
+ iscsi_dequeue_active_cmd(
+ t_icmdp->cmd_conn, t_icmdp);
+ mutex_enter(&icp->
+ conn_queue_idm_aborting.mutex);
+ iscsi_enqueue_idm_aborting_cmd(
+ t_icmdp->cmd_conn, t_icmdp);
+ mutex_exit(&icp->
+ conn_queue_idm_aborting.mutex);
+ /*
+ * Complete abort processing after IDM
+ * calls us back. Set the status to use
+ * when we complete the command.
+ */
+ ISCSI_CMD_SET_REASON_STAT(t_icmdp,
+ CMD_TIMEOUT, STAT_TIMEOUT);
+ idm_task_abort(icp->conn_ic,
+ t_icmdp->cmd_itp,
+ AT_TASK_MGMT_ABORT);
+ } else {
+ cv_broadcast(&t_icmdp->cmd_completion);
+ }
+ mutex_exit(&t_icmdp->cmd_mutex);
+ } else {
+ t_icmdp->cmd_un.scsi.abort_icmdp = NULL;
}
-
- t_icmdp->cmd_un.scsi.abort_icmdp = NULL;
- cv_broadcast(&t_icmdp->cmd_completion);
- mutex_exit(&t_icmdp->cmd_mutex);
icmdp->cmd_un.abort.icmdp = NULL;
-
icmdp->cmd_misc_flags |=
ISCSI_CMD_MISCFLAG_FREE;
-
break;
case ISCSI_CMD_TYPE_RESET:
@@ -958,7 +997,6 @@
break;
default:
- mutex_enter(&isp->sess_cmdsn_mutex);
ASSERT(FALSE);
}
@@ -967,26 +1005,19 @@
/* -E7: Connection has encountered a problem */
case ISCSI_CMD_EVENT_E7:
-
mutex_enter(&isp->sess_cmdsn_mutex);
iscsi_dequeue_active_cmd(icmdp->cmd_conn, icmdp);
switch (icmdp->cmd_type) {
case ISCSI_CMD_TYPE_SCSI:
- iscsi_sess_release_itt(isp, icmdp);
mutex_exit(&isp->sess_cmdsn_mutex);
+ mutex_enter(&icp->conn_queue_idm_aborting.mutex);
+ iscsi_enqueue_idm_aborting_cmd(icmdp->cmd_conn, icmdp);
+ mutex_exit(&icp->conn_queue_idm_aborting.mutex);
- /* notify caller of error */
- ISCSI_CMD_SET_REASON_STAT(icmdp,
- CMD_TRAN_ERR, 0);
- iscsi_enqueue_completed_cmd(isp, icmdp);
- break;
-
- case ISCSI_CMD_TYPE_R2T:
- icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
- mutex_exit(&isp->sess_cmdsn_mutex);
- /* should never get in active queue */
- ASSERT(FALSE);
+ ISCSI_CMD_SET_REASON_STAT(icmdp, CMD_TRAN_ERR, 0);
+ idm_task_abort(icp->conn_ic, icmdp->cmd_itp,
+ AT_TASK_MGMT_ABORT);
break;
case ISCSI_CMD_TYPE_NOP:
@@ -1073,6 +1104,17 @@
ASSERT(!mutex_owned(&isp->sess_cmdsn_mutex));
break;
+ /* -E9: IDM is no longer processing this command */
+ case ISCSI_CMD_EVENT_E9:
+ iscsi_dequeue_active_cmd(icmdp->cmd_conn, icmdp);
+
+ iscsi_task_cleanup(ISCSI_OP_SCSI_RSP, icmdp);
+ iscsi_sess_release_scsi_itt(icmdp);
+
+ ISCSI_CMD_SET_REASON_STAT(icmdp, CMD_TRAN_ERR, 0);
+ iscsi_enqueue_completed_cmd(isp, icmdp);
+ break;
+
/* All other events are invalid for this state */
default:
ASSERT(FALSE);
@@ -1088,6 +1130,7 @@
iscsi_cmd_state_aborting(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg)
{
iscsi_sess_t *isp = (iscsi_sess_t *)arg;
+ iscsi_cmd_t *a_icmdp;
ASSERT(icmdp != NULL);
ASSERT(icmdp->cmd_type == ISCSI_CMD_TYPE_SCSI);
@@ -1104,7 +1147,7 @@
*/
mutex_enter(&isp->sess_cmdsn_mutex);
iscsi_dequeue_active_cmd(icmdp->cmd_conn, icmdp);
- iscsi_sess_release_itt(isp, icmdp);
+ iscsi_sess_release_scsi_itt(icmdp);
mutex_exit(&isp->sess_cmdsn_mutex);
iscsi_enqueue_completed_cmd(isp, icmdp);
@@ -1132,15 +1175,113 @@
/* -E7: Connection has encountered a problem */
case ISCSI_CMD_EVENT_E7:
- mutex_enter(&isp->sess_cmdsn_mutex);
iscsi_dequeue_active_cmd(icmdp->cmd_conn, icmdp);
- iscsi_sess_release_itt(isp, icmdp);
- mutex_exit(&isp->sess_cmdsn_mutex);
+ mutex_enter(&icmdp->cmd_conn->conn_queue_idm_aborting.mutex);
+ iscsi_enqueue_idm_aborting_cmd(icmdp->cmd_conn, icmdp);
+ mutex_exit(&icmdp->cmd_conn->conn_queue_idm_aborting.mutex);
+
+ /*
+ * Since we are in "aborting" state there is another command
+ * representing the abort of this command. This command
+ * will cleanup at some indeterminate time after the call
+ * to idm_task_abort so we can't leave the abort request
+ * active. An E10 event to the abort command will cause
+ * it to complete immediately.
+ */
+ if ((a_icmdp = icmdp->cmd_un.scsi.abort_icmdp) != NULL) {
+ iscsi_cmd_state_machine(a_icmdp,
+ ISCSI_CMD_EVENT_E10, arg);
+ }
+
+ ISCSI_CMD_SET_REASON_STAT(icmdp, CMD_TRAN_ERR, 0);
+ idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp,
+ AT_TASK_MGMT_ABORT);
+ break;
+
+ /* -E9: IDM is no longer processing this command */
+ case ISCSI_CMD_EVENT_E9:
+ iscsi_dequeue_active_cmd(icmdp->cmd_conn, icmdp);
+
+ iscsi_task_cleanup(ISCSI_OP_SCSI_RSP, icmdp);
+ iscsi_sess_release_scsi_itt(icmdp);
+
+ ISCSI_CMD_SET_REASON_STAT(icmdp, CMD_TRAN_ERR, 0);
+ iscsi_enqueue_completed_cmd(isp, icmdp);
+ break;
+
+ /* All other events are invalid for this state */
+ default:
+ ASSERT(FALSE);
+ }
+}
+
+static void
+iscsi_cmd_state_idm_aborting(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event,
+ void *arg)
+{
+ iscsi_sess_t *isp = (iscsi_sess_t *)arg;
+
+ ASSERT(icmdp != NULL);
+ ASSERT(icmdp->cmd_type == ISCSI_CMD_TYPE_SCSI);
+ ASSERT(icmdp->cmd_state == ISCSI_CMD_STATE_IDM_ABORTING);
+ ASSERT(isp != NULL);
- /* complete io with error */
- ISCSI_CMD_SET_REASON_STAT(icmdp,
- CMD_TRAN_ERR, 0);
+ /* switch on event change */
+ switch (event) {
+ /* -E3: Command was successfully completed */
+ case ISCSI_CMD_EVENT_E3:
+ /*
+ * iscsi_rx_process_cmd_rsp() and iscsi_rx_process_data_rsp()
+ * are supposed to confirm the cmd state is appropriate before
+ * generating an E3 event. E3 is not allowed in this state.
+ */
+ ASSERT(0);
+ break;
+
+ /* -E4: Command has been requested to abort */
+ case ISCSI_CMD_EVENT_E4:
+ /*
+ * An upper level driver might attempt to
+ * abort a command that we are already
+ * aborting due to a nop. Since we are
+ * already in the process of aborting
+ * ignore the request.
+ */
+ break;
+
+ /* -E6: Command has timed out */
+ case ISCSI_CMD_EVENT_E6:
+ ASSERT(FALSE);
+ /*
+ * Timeouts should not occur on aborting commands
+ */
+ break;
+
+ /* -E7: Connection has encountered a problem */
+ case ISCSI_CMD_EVENT_E7:
+ /*
+ * We have already requested IDM to stop processing this
+ * command so ignore this request.
+ */
+ break;
+
+ /* -E9: IDM is no longer processing this command */
+ case ISCSI_CMD_EVENT_E9:
+ mutex_enter(&icmdp->cmd_conn->conn_queue_idm_aborting.mutex);
+ iscsi_dequeue_idm_aborting_cmd(icmdp->cmd_conn, icmdp);
+ mutex_exit(&icmdp->cmd_conn->conn_queue_idm_aborting.mutex);
+
+ /* This is always an error so make sure an error has been set */
+ ASSERT(icmdp->cmd_un.scsi.pkt->pkt_reason != CMD_CMPLT);
+ iscsi_task_cleanup(ISCSI_OP_SCSI_RSP, icmdp);
+ iscsi_sess_release_scsi_itt(icmdp);
+
+ /*
+ * Whoever called idm_task_abort should have set the completion
+ * status beforehand.
+ */
iscsi_enqueue_completed_cmd(isp, icmdp);
+ cv_broadcast(&icmdp->cmd_completion);
break;
/* All other events are invalid for this state */
@@ -1200,6 +1341,8 @@
return ("active");
case ISCSI_CMD_STATE_ABORTING:
return ("aborting");
+ case ISCSI_CMD_STATE_IDM_ABORTING:
+ return ("idm-aborting");
case ISCSI_CMD_STATE_COMPLETED:
return ("completed");
default:
@@ -1230,6 +1373,10 @@
return ("E7");
case ISCSI_CMD_EVENT_E8:
return ("E8");
+ case ISCSI_CMD_EVENT_E9:
+ return ("E9");
+ case ISCSI_CMD_EVENT_E10:
+ return ("E10");
default:
return ("unknown");
}
@@ -1246,8 +1393,6 @@
switch (type) {
case ISCSI_CMD_TYPE_SCSI:
return ("scsi");
- case ISCSI_CMD_TYPE_R2T:
- return ("r2t");
case ISCSI_CMD_TYPE_NOP:
return ("nop");
case ISCSI_CMD_TYPE_ABORT:
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_conn.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_conn.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,42 +19,29 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* iSCSI connection interfaces
*/
+#define ISCSI_ICS_NAMES
#include "iscsi.h"
#include "persistent.h"
#include <sys/bootprops.h>
extern ib_boot_prop_t *iscsiboot_prop;
-/* interface connection interfaces */
-static iscsi_status_t iscsi_conn_state_free(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-static void iscsi_conn_state_in_login(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-static void iscsi_conn_state_logged_in(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-static void iscsi_conn_state_in_logout(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-static void iscsi_conn_state_failed(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-static void iscsi_conn_state_polling(iscsi_conn_t *icp,
- iscsi_conn_event_t event);
-static char *iscsi_conn_event_str(iscsi_conn_event_t event);
+static void iscsi_client_notify_task(void *cn_task_void);
+
static void iscsi_conn_flush_active_cmds(iscsi_conn_t *icp);
-static void iscsi_conn_logged_in(iscsi_sess_t *isp,
- iscsi_conn_t *icp);
-static void iscsi_conn_retry(iscsi_sess_t *isp,
- iscsi_conn_t *icp);
-
#define SHUTDOWN_TIMEOUT 180 /* seconds */
extern int modrootloaded;
+
+boolean_t iscsi_conn_logging = B_FALSE;
+
/*
* +--------------------------------------------------------------------+
* | External Connection Interfaces |
@@ -105,16 +92,22 @@
icp->conn_state = ISCSI_CONN_STATE_FREE;
mutex_init(&icp->conn_state_mutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&icp->conn_state_change, NULL, CV_DRIVER, NULL);
+ mutex_init(&icp->conn_login_mutex, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&icp->conn_login_cv, NULL, CV_DRIVER, NULL);
icp->conn_state_destroy = B_FALSE;
+ idm_sm_audit_init(&icp->conn_state_audit);
icp->conn_sess = isp;
- icp->conn_state_lbolt = ddi_get_lbolt();
mutex_enter(&iscsi_oid_mutex);
icp->conn_oid = iscsi_oid++;
mutex_exit(&iscsi_oid_mutex);
- /* Creation of the receive thread */
- if (snprintf(th_name, sizeof (th_name) - 1, ISCSI_CONN_RXTH_NAME_FORMAT,
+ /*
+ * IDM CN taskq
+ */
+
+ if (snprintf(th_name, sizeof (th_name) - 1,
+ ISCSI_CONN_CN_TASKQ_NAME_FORMAT,
icp->conn_sess->sess_hba->hba_oid, icp->conn_sess->sess_oid,
icp->conn_oid) >= sizeof (th_name)) {
cv_destroy(&icp->conn_state_change);
@@ -124,17 +117,25 @@
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- icp->conn_rx_thread = iscsi_thread_create(isp->sess_hba->hba_dip,
- th_name, iscsi_rx_thread, icp);
+ icp->conn_cn_taskq =
+ ddi_taskq_create(icp->conn_sess->sess_hba->hba_dip, th_name, 1,
+ TASKQ_DEFAULTPRI, 0);
+ if (icp->conn_cn_taskq == NULL) {
+ cv_destroy(&icp->conn_state_change);
+ mutex_destroy(&icp->conn_state_mutex);
+ kmem_free(icp, sizeof (iscsi_conn_t));
+ *icpp = NULL;
+ return (ISCSI_STATUS_INTERNAL_ERROR);
+ }
/* Creation of the transfer thread */
if (snprintf(th_name, sizeof (th_name) - 1, ISCSI_CONN_TXTH_NAME_FORMAT,
icp->conn_sess->sess_hba->hba_oid, icp->conn_sess->sess_oid,
icp->conn_oid) >= sizeof (th_name)) {
- iscsi_thread_destroy(icp->conn_rx_thread);
cv_destroy(&icp->conn_state_change);
mutex_destroy(&icp->conn_state_mutex);
kmem_free(icp, sizeof (iscsi_conn_t));
+ ddi_taskq_destroy(icp->conn_cn_taskq);
*icpp = NULL;
return (ISCSI_STATUS_INTERNAL_ERROR);
}
@@ -144,6 +145,7 @@
/* setup connection queues */
iscsi_init_queue(&icp->conn_queue_active);
+ iscsi_init_queue(&icp->conn_queue_idm_aborting);
bcopy(addr, &icp->conn_base_addr, sizeof (icp->conn_base_addr));
@@ -164,6 +166,47 @@
return (ISCSI_STATUS_SUCCESS);
}
+/*
+ * iscsi_conn_online - This attempts to take a connection from
+ * ISCSI_CONN_STATE_FREE to ISCSI_CONN_STATE_LOGGED_IN.
+ */
+iscsi_status_t
+iscsi_conn_online(iscsi_conn_t *icp)
+{
+ iscsi_task_t *itp;
+ iscsi_status_t rval;
+
+ ASSERT(icp != NULL);
+ ASSERT(mutex_owned(&icp->conn_state_mutex));
+ ASSERT(icp->conn_state == ISCSI_CONN_STATE_FREE);
+
+ /*
+ * If we are attempting to connect then for the purposes of the
+ * other initiator code we are effectively in ISCSI_CONN_STATE_IN_LOGIN.
+ */
+ iscsi_conn_update_state_locked(icp, ISCSI_CONN_STATE_IN_LOGIN);
+ mutex_exit(&icp->conn_state_mutex);
+
+ /*
+ * Sync base connection information before login
+ * A login redirection might have shifted the
+ * current information from the base.
+ */
+ bcopy(&icp->conn_base_addr, &icp->conn_curr_addr,
+ sizeof (icp->conn_curr_addr));
+
+ itp = kmem_zalloc(sizeof (iscsi_task_t), KM_SLEEP);
+ ASSERT(itp != NULL);
+
+ itp->t_arg = icp;
+ itp->t_blocking = B_TRUE;
+ rval = iscsi_login_start(itp);
+ kmem_free(itp, sizeof (iscsi_task_t));
+
+ mutex_enter(&icp->conn_state_mutex);
+
+ return (rval);
+}
/*
* iscsi_conn_offline - This attempts to take a connection from
@@ -184,34 +227,45 @@
* on the connection to influence the transitions
* to quickly complete. Then wait for a state
* transition.
+ *
+ * ISCSI_CONN_STATE_LOGGED_IN is set immediately at the
+ * start of CN_NOTIFY_FFP processing. icp->conn_state_ffp
+ * is set to true at the end of ffp processing, at which
+ * point any session updates are complete. We don't
+ * want to start offlining the connection before we're
+ * done completing the FFP processing since this might
+ * interrupt the discovery process.
*/
delay = ddi_get_lbolt() + SEC_TO_TICK(SHUTDOWN_TIMEOUT);
mutex_enter(&icp->conn_state_mutex);
icp->conn_state_destroy = B_TRUE;
- while ((icp->conn_state != ISCSI_CONN_STATE_FREE) &&
- (icp->conn_state != ISCSI_CONN_STATE_LOGGED_IN) &&
+ while ((((icp->conn_state != ISCSI_CONN_STATE_FREE) &&
+ (icp->conn_state != ISCSI_CONN_STATE_LOGGED_IN)) ||
+ ((icp->conn_state == ISCSI_CONN_STATE_LOGGED_IN) &&
+ !icp->conn_state_ffp)) &&
(ddi_get_lbolt() < delay)) {
/* wait for transition */
(void) cv_timedwait(&icp->conn_state_change,
&icp->conn_state_mutex, delay);
}
- /* Final check whether we can destroy the connection */
switch (icp->conn_state) {
case ISCSI_CONN_STATE_FREE:
- /* Easy case - Connection is dead */
break;
case ISCSI_CONN_STATE_LOGGED_IN:
- /* Hard case - Force connection logout */
- (void) iscsi_conn_state_machine(icp,
- ISCSI_CONN_EVENT_T9);
+ if (icp->conn_state_ffp)
+ (void) iscsi_handle_logout(icp);
+ else {
+ icp->conn_state_destroy = B_FALSE;
+ mutex_exit(&icp->conn_state_mutex);
+ return (ISCSI_STATUS_INTERNAL_ERROR);
+ }
break;
case ISCSI_CONN_STATE_IN_LOGIN:
case ISCSI_CONN_STATE_IN_LOGOUT:
case ISCSI_CONN_STATE_FAILED:
case ISCSI_CONN_STATE_POLLING:
default:
- /* All other cases fail the destroy */
icp->conn_state_destroy = B_FALSE;
mutex_exit(&icp->conn_state_mutex);
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -241,15 +295,16 @@
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- /* Destroy receive thread */
- iscsi_thread_destroy(icp->conn_rx_thread);
-
/* Destroy transfer thread */
iscsi_thread_destroy(icp->conn_tx_thread);
+ ddi_taskq_destroy(icp->conn_cn_taskq);
/* Terminate connection queues */
+ iscsi_destroy_queue(&icp->conn_queue_idm_aborting);
iscsi_destroy_queue(&icp->conn_queue_active);
+ cv_destroy(&icp->conn_login_cv);
+ mutex_destroy(&icp->conn_login_mutex);
cv_destroy(&icp->conn_state_change);
mutex_destroy(&icp->conn_state_mutex);
@@ -317,186 +372,255 @@
}
+/*
+ * Process the idm notifications
+ */
+idm_status_t
+iscsi_client_notify(idm_conn_t *ic, idm_client_notify_t icn, uintptr_t data)
+{
+ iscsi_cn_task_t *cn;
+ iscsi_conn_t *icp = ic->ic_handle;
+ iscsi_sess_t *isp;
-/*
- * iscsi_conn_state_machine - This function is used to drive the
- * state machine of the iscsi connection. It takes in a connection
- * and the associated event effecting the connection.
- *
- * 7.1.3 Connection State Diagram for an Initiator
- * Symbolic Names for States:
- * S1: FREE - State on instantiation, or after successful
- * connection closure.
- * S2: IN_LOGIN - Waiting for login process to conclude,
- * possibly involving several PDU exchanges.
- * S3: LOGGED_IN - In Full Feature Phase, waiting for all internal,
- * iSCSI, and transport events
- * S4: IN_LOGOUT - Waiting for the Logout repsonse.
- * S5: FAILED - The connection has failed. Attempting
- * to reconnect.
- * S6: POLLING - The connection reconnect attempts have
- * failed. Continue to poll at a lower
- * frequency.
- *
- * States S3, S4 constitute the Full Feature Phase
- * of the connection.
- *
- * The state diagram is as follows:
- * -------
- * +-------->/ S1 \<------------------------------+
- * | +->\ /<---+ /---\ |
- * | / ---+--- |T7/30 T7| | |
- * | + | | \->------ |
- * | T8| |T1 / T5 / S6 \--->|
- * | | | / +----------\ /T30 |
- * | | V / / ------ |
- * | | ------- / / ^ |
- * | | / S2 \ / T5 |T7 |
- * | | \ / +-------------- --+--- |
- * | | ---+--- / / S5 \--->|
- * | | | / T14/T15 \ /T30 |
- * | | |T5 / +-------------> ------ |
- * | | | / / |
- * | | | / / T11 |
- * | | | / / +----+ |
- * | | V V / | | |
- * | | ------+ ----+-- | |
- * | +-----/ S3 \T9/11/ S4 \<+ |
- * +----------\ /---->\ /----------------+
- * ------- ------- T15/T17
- *
- * The state transition table is as follows:
- *
- * +-----+---+---+------+------+---+
- * |S1 |S2 |S3 |S4 |S5 |S6 |
- * ---+-----+---+---+------+------+---+
- * S1|T1 |T1 | - | - | - | |
- * ---+-----+---+---+------+------+---+
- * S2|T7/30|- |T5 | - | - | |
- * ---+-----+---+---+------+------+---+
- * S3|T8 |- | - |T9/11 |T14/15| |
- * ---+-----+---+---+------+------+---+
- * S4| |- | - |T11 |T15/17| |
- * ---+-----+---+---+------+------+---+
- * S5|T30 | |T5 | | |T7 |
- * ---+-----+---+---+------+------+---+
- * S6|T30 | |T5 | | |T7 |
- * ---+-----+---+---+------+------+---+
- *
- * Events definitions:
- *
- * -T1: Transport connection request was made (e.g., TCP SYN sent).
- * -T5: The final iSCSI Login response with a Status-Class of zero was
- * received.
- * -T7: One of the following events caused the transition:
- * - Login timed out.
- * - A transport disconnect indication was received.
- * - A transport reset was received.
- * - An internal event indicating a transport timeout was
- * received.
- * - An internal event of receiving a Logout repsonse (success)
- * on another connection for a "close the session" Logout
- * request was received.
- * * In all these cases, the transport connection is closed.
- * -T8: An internal event of receiving a Logout response (success)
- * on another connection for a "close the session" Logout request
- * was received, thus closing this connection requiring no further
- * cleanup.
- * -T9: An internal event that indicates the readiness to start the
- * Logout process was received, thus prompting an iSCSI Logout to
- * be sent by the initiator.
- * -T11: Async PDU with AsyncEvent "Request Logout" was received.
- * -T13: An iSCSI Logout response (success) was received, or an internal
- * event of receiving a Logout response (success) on another
- * connection was received.
- * -T14: One or more of the following events case this transition:
- * - Header Digest Error
- * - Protocol Error
- * -T15: One or more of the following events caused this transition:
- * - Internal event that indicates a transport connection timeout
- * was received thus prompting transport RESET or transport
- * connection closure.
- * - A transport RESET
- * - A transport disconnect indication.
- * - Async PDU with AsyncEvent "Drop connection" (for this CID)
- * - Async PDU with AsyncEvent "Drop all connections"
- * -T17: One or more of the following events caused this transition:
- * - Logout response, (failure i.e., a non-zero status) was
- * received, or Logout timed out.
- * - Any of the events specified for T15.
- * -T30: One of the following event caused the transition:
- * - Thefinal iSCSI Login response was received with a non-zero
- * Status-Class.
- */
-iscsi_status_t
-iscsi_conn_state_machine(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_status_t status = ISCSI_STATUS_SUCCESS;
+ /*
+ * Don't access icp if the notification is CN_CONNECT_DESTROY
+ * since icp may have already been freed.
+ *
+ * Handle CN_FFP_ENABLED and CN_CONNECT_DESTROY immediately
+ */
+ switch (icn) {
+ case CN_CONNECT_FAIL:
+ case CN_LOGIN_FAIL:
+ /*
+ * Wakeup any thread waiting for login stuff to happen.
+ */
+ ASSERT(icp != NULL);
+ iscsi_login_update_state(icp, LOGIN_ERROR);
+ return (IDM_STATUS_SUCCESS);
+ case CN_READY_FOR_LOGIN:
+ idm_conn_hold(ic); /* Released in CN_CONNECT_LOST */
+ mutex_enter(&icp->conn_state_mutex);
+ icp->conn_state_idm_connected = B_TRUE;
+ cv_broadcast(&icp->conn_state_change);
+ mutex_exit(&icp->conn_state_mutex);
+
+ iscsi_login_update_state(icp, LOGIN_READY);
+ return (IDM_STATUS_SUCCESS);
+ case CN_CONNECT_DESTROY:
+ /*
+ * We released any dependecies we had on this object in
+ * either CN_LOGIN_FAIL or CN_CONNECT_LOST so we just need
+ * to destroy the IDM connection now.
+ */
+ idm_ini_conn_destroy(ic);
+ return (IDM_STATUS_SUCCESS);
+ }
ASSERT(icp != NULL);
- ASSERT(mutex_owned(&icp->conn_state_mutex));
+ isp = icp->conn_sess;
+
+ /*
+ * Dispatch notifications to the taskq since they often require
+ * long blocking operations. In the case of CN_CONNECT_DESTROY
+ * we actually just want to destroy the connection which we
+ * can't do in the IDM taskq context.
+ */
+ cn = kmem_alloc(sizeof (*cn), KM_SLEEP);
+
+ cn->ct_ic = ic;
+ cn->ct_icn = icn;
+ cn->ct_data = data;
- DTRACE_PROBE3(event, iscsi_conn_t *, icp,
- char *, iscsi_conn_state_str(icp->conn_state),
- char *, iscsi_conn_event_str(event));
+ idm_conn_hold(ic);
+
+ if (ddi_taskq_dispatch(icp->conn_cn_taskq,
+ iscsi_client_notify_task, cn, DDI_SLEEP) != DDI_SUCCESS) {
+ idm_conn_rele(ic);
+ cmn_err(CE_WARN, "iscsi connection(%u) failure - "
+ "unable to schedule notify task", icp->conn_oid);
+ iscsi_conn_update_state(icp, ISCSI_CONN_STATE_FREE);
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp,
+ ISCSI_SESS_EVENT_N6);
+ mutex_exit(&isp->sess_state_mutex);
+ }
+
+ return (IDM_STATUS_SUCCESS);
+}
- icp->conn_prev_state = icp->conn_state;
- icp->conn_state_lbolt = ddi_get_lbolt();
+static void
+iscsi_client_notify_task(void *cn_task_void)
+{
+ iscsi_cn_task_t *cn_task = cn_task_void;
+ iscsi_conn_t *icp;
+ iscsi_sess_t *isp;
+ idm_conn_t *ic;
+ idm_client_notify_t icn;
+ uintptr_t data;
+ idm_ffp_disable_t disable_type;
+ boolean_t in_login;
+
+ ic = cn_task->ct_ic;
+ icn = cn_task->ct_icn;
+ data = cn_task->ct_data;
+
+ icp = ic->ic_handle;
+ ASSERT(icp != NULL);
+ isp = icp->conn_sess;
- switch (icp->conn_state) {
- case ISCSI_CONN_STATE_FREE:
- status = iscsi_conn_state_free(icp, event);
+ switch (icn) {
+ case CN_FFP_ENABLED:
+ mutex_enter(&icp->conn_state_mutex);
+ icp->conn_async_logout = B_FALSE;
+ icp->conn_state_ffp = B_TRUE;
+ cv_broadcast(&icp->conn_state_change);
+ mutex_exit(&icp->conn_state_mutex);
+
+ /*
+ * This logic assumes that the IDM login-snooping code
+ * and the initiator login code will agree on whether
+ * the connection is in FFP. The reason we do this
+ * is that we don't want to process CN_FFP_DISABLED until
+ * CN_FFP_ENABLED has been full handled.
+ */
+ mutex_enter(&icp->conn_login_mutex);
+ while (icp->conn_login_state != LOGIN_FFP) {
+ cv_wait(&icp->conn_login_cv, &icp->conn_login_mutex);
+ }
+ mutex_exit(&icp->conn_login_mutex);
break;
- case ISCSI_CONN_STATE_IN_LOGIN:
- iscsi_conn_state_in_login(icp, event);
- break;
- case ISCSI_CONN_STATE_LOGGED_IN:
- iscsi_conn_state_logged_in(icp, event);
+ case CN_FFP_DISABLED:
+ disable_type = (idm_ffp_disable_t)data;
+
+ mutex_enter(&icp->conn_state_mutex);
+ switch (disable_type) {
+ case FD_SESS_LOGOUT:
+ case FD_CONN_LOGOUT:
+ if (icp->conn_async_logout) {
+ /*
+ * Our logout was in response to an
+ * async logout request so treat this
+ * like a connection failure (we will
+ * try to re-establish the connection)
+ */
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FAILED);
+ } else {
+ /*
+ * Logout due to to user config change,
+ * we will not try to re-establish
+ * the connection.
+ */
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_IN_LOGOUT);
+ /*
+ * Hold off generating the ISCSI_SESS_EVENT_N3
+ * event until we get the CN_CONNECT_LOST
+ * notification. This matches the pre-IDM
+ * implementation better.
+ */
+ }
+ break;
+
+ case FD_CONN_FAIL:
+ default:
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FAILED);
+ break;
+ }
+
+ icp->conn_state_ffp = B_FALSE;
+ cv_broadcast(&icp->conn_state_change);
+ mutex_exit(&icp->conn_state_mutex);
+
break;
- case ISCSI_CONN_STATE_IN_LOGOUT:
- iscsi_conn_state_in_logout(icp, event);
- break;
- case ISCSI_CONN_STATE_FAILED:
- iscsi_conn_state_failed(icp, event);
- break;
- case ISCSI_CONN_STATE_POLLING:
- iscsi_conn_state_polling(icp, event);
+ case CN_CONNECT_LOST:
+ /*
+ * We only care about CN_CONNECT_LOST if we've logged in. IDM
+ * sends a flag as the data payload to indicate whether we
+ * were trying to login. The CN_LOGIN_FAIL notification
+ * gives us what we need to know for login failures and
+ * otherwise we will need to keep a bunch of state to know
+ * what CN_CONNECT_LOST means to us.
+ */
+ in_login = (boolean_t)data;
+ if (in_login) {
+ mutex_enter(&icp->conn_state_mutex);
+
+ icp->conn_state_idm_connected = B_FALSE;
+ cv_broadcast(&icp->conn_state_change);
+ mutex_exit(&icp->conn_state_mutex);
+
+ /* Release connect hold from CN_READY_FOR_LOGIN */
+ idm_conn_rele(ic);
+ break;
+ }
+
+ /* Any remaining commands are never going to finish */
+ iscsi_conn_flush_active_cmds(icp);
+
+ /*
+ * The connection is no longer active so cleanup any
+ * references to the connection and release any holds so
+ * that IDM can finish cleanup.
+ */
+ mutex_enter(&icp->conn_state_mutex);
+ if (icp->conn_state != ISCSI_CONN_STATE_FAILED) {
+
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N3);
+ mutex_exit(&isp->sess_state_mutex);
+
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FREE);
+ } else {
+
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp,
+ ISCSI_SESS_EVENT_N5);
+ mutex_exit(&isp->sess_state_mutex);
+
+ /*
+ * If session type is NORMAL, try to reestablish the
+ * connection.
+ */
+ if (isp->sess_type == ISCSI_SESS_TYPE_NORMAL) {
+ iscsi_conn_retry(isp, icp);
+ } else {
+
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp,
+ ISCSI_SESS_EVENT_N6);
+ mutex_exit(&isp->sess_state_mutex);
+
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FREE);
+ }
+ }
+
+ (void) iscsi_thread_stop(icp->conn_tx_thread);
+
+ icp->conn_state_idm_connected = B_FALSE;
+ cv_broadcast(&icp->conn_state_change);
+ mutex_exit(&icp->conn_state_mutex);
+
+ /* Release connect hold from CN_READY_FOR_LOGIN */
+ idm_conn_rele(ic);
break;
default:
- ASSERT(FALSE);
- status = ISCSI_STATUS_INTERNAL_ERROR;
+ ISCSI_CONN_LOG(CE_WARN,
+ "iscsi_client_notify: unknown notification: "
+ "%x: NOT IMPLEMENTED YET: icp: %p ic: %p ",
+ icn, (void *)icp, (void *)ic);
+ break;
}
-
- cv_broadcast(&icp->conn_state_change);
- return (status);
-}
-
+ /* free the task notify structure we allocated in iscsi_client_notify */
+ kmem_free(cn_task, sizeof (*cn_task));
-/*
- * iscsi_conn_state_str - converts state enum to a string
- */
-char *
-iscsi_conn_state_str(iscsi_conn_state_t state)
-{
- switch (state) {
- case ISCSI_CONN_STATE_FREE:
- return ("free");
- case ISCSI_CONN_STATE_IN_LOGIN:
- return ("in_login");
- case ISCSI_CONN_STATE_LOGGED_IN:
- return ("logged_in");
- case ISCSI_CONN_STATE_IN_LOGOUT:
- return ("in_logout");
- case ISCSI_CONN_STATE_FAILED:
- return ("failed");
- case ISCSI_CONN_STATE_POLLING:
- return ("polling");
- default:
- return ("unknown");
- }
+ /* Release the hold we acquired in iscsi_client_notify */
+ idm_conn_rele(ic);
}
-
/*
* iscsi_conn_sync_params - used to update connection parameters
*
@@ -562,7 +686,8 @@
break;
}
if (pp.p_bitmap & (1 << param_id)) {
- switch (param_id) {
+
+ switch (param_id) {
/*
* Boolean parameters
*/
@@ -762,488 +887,6 @@
* +--------------------------------------------------------------------+
*/
-
-/*
- * iscsi_conn_state_free -
- *
- * S1: FREE - State on instantiation, or after successful
- * connection closure.
- */
-static iscsi_status_t
-iscsi_conn_state_free(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_sess_t *isp;
- iscsi_hba_t *ihp;
- iscsi_task_t *itp;
- iscsi_status_t status = ISCSI_STATUS_SUCCESS;
-
- ASSERT(icp != NULL);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
- ihp = isp->sess_hba;
- ASSERT(ihp != NULL);
- ASSERT(icp->conn_state == ISCSI_CONN_STATE_FREE);
-
- /* switch on event change */
- switch (event) {
- /* -T1: Transport connection request was request */
- case ISCSI_CONN_EVENT_T1:
- icp->conn_state = ISCSI_CONN_STATE_IN_LOGIN;
-
- /*
- * Release the connection state mutex cross the
- * the dispatch of the login task. The login task
- * will reacquire the connection state mutex when
- * it pushes the connection successful or failed.
- */
- mutex_exit(&icp->conn_state_mutex);
-
- /* start login */
- itp = kmem_zalloc(sizeof (iscsi_task_t), KM_SLEEP);
- itp->t_arg = icp;
- itp->t_blocking = B_TRUE;
-
- /*
- * Sync base connection information before login
- * A login redirection might have shifted the
- * current information from the base.
- */
- bcopy(&icp->conn_base_addr, &icp->conn_curr_addr,
- sizeof (icp->conn_curr_addr));
-
- status = iscsi_login_start(itp);
- kmem_free(itp, sizeof (iscsi_task_t));
-
- mutex_enter(&icp->conn_state_mutex);
- break;
-
- /* All other events are invalid for this state */
- default:
- ASSERT(FALSE);
- status = ISCSI_STATUS_INTERNAL_ERROR;
- }
- return (status);
-}
-
-/*
- * iscsi_conn_state_in_login - During this state we are trying to
- * connect the TCP connection and make a successful login to the
- * target. To complete this we have a task queue item that is
- * trying this processing at this point in time. When the task
- * queue completed its processing it will issue either a T5/7
- * event.
- */
-static void
-iscsi_conn_state_in_login(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_sess_t *isp;
-
- ASSERT(icp != NULL);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
- ASSERT(icp->conn_state == ISCSI_CONN_STATE_IN_LOGIN);
-
- /* switch on event change */
- switch (event) {
- /*
- * -T5: The final iSCSI Login response with a Status-Class of zero
- * was received.
- */
- case ISCSI_CONN_EVENT_T5:
- iscsi_conn_logged_in(isp, icp);
- break;
-
- /*
- * -T30: One of the following event caused the transition:
- * - Thefinal iSCSI Login response was received with a non-zero
- * Status-Class.
- */
- case ISCSI_CONN_EVENT_T30:
- /* FALLTHRU */
-
- /*
- * -T7: One of the following events caused the transition:
- * - Login timed out.
- * - A transport disconnect indication was received.
- * - A transport reset was received.
- * - An internal event indicating a transport timeout was
- * received.
- * - An internal event of receiving a Logout repsonse (success)
- * on another connection for a "close the session" Logout
- * request was received.
- * * In all these cases, the transport connection is closed.
- */
- case ISCSI_CONN_EVENT_T7:
- icp->conn_state = ISCSI_CONN_STATE_FREE;
- break;
-
- /* All other events are invalid for this state */
- default:
- ASSERT(FALSE);
- }
-}
-
-
-/*
- * iscsi_conn_state_logged_in -
- *
- */
-static void
-iscsi_conn_state_logged_in(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_sess_t *isp;
- iscsi_hba_t *ihp;
-
- ASSERT(icp != NULL);
- ASSERT(icp->conn_state == ISCSI_CONN_STATE_LOGGED_IN);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
- ihp = isp->sess_hba;
- ASSERT(ihp != NULL);
-
- /* switch on event change */
- switch (event) {
- /*
- * -T8: An internal event of receiving a Logout response (success)
- * on another connection for a "close the session" Logout request
- * was received, thus closing this connection requiring no further
- * cleanup.
- */
- case ISCSI_CONN_EVENT_T8:
- icp->conn_state = ISCSI_CONN_STATE_FREE;
-
- /* stop tx thread */
- (void) iscsi_thread_stop(icp->conn_tx_thread);
-
- /* Disconnect connection */
- iscsi_net->close(icp->conn_socket);
-
- /* Notify session that a connection logged out */
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(icp->conn_sess, ISCSI_SESS_EVENT_N3);
- mutex_exit(&isp->sess_state_mutex);
- break;
-
- /*
- * -T9: An internal event that indicates the readiness to start the
- * Logout process was received, thus prompting an iSCSI Logout
- * to be sent by the initiator.
- */
- case ISCSI_CONN_EVENT_T9:
- /* FALLTHRU */
-
- /*
- * -T11: Aync PDU with AsyncEvent "Request Logout" was recevied
- */
- case ISCSI_CONN_EVENT_T11:
- icp->conn_state = ISCSI_CONN_STATE_IN_LOGOUT;
-
- (void) iscsi_handle_logout(icp);
- break;
-
- /*
- * -T14: One or more of the following events case this transition:
- * - Header Digest Error
- * - Protocol Error
- */
- case ISCSI_CONN_EVENT_T14:
- icp->conn_state = ISCSI_CONN_STATE_FAILED;
-
- /* stop tx thread */
- (void) iscsi_thread_stop(icp->conn_tx_thread);
-
- /*
- * Error Recovery Level 0 states we should drop
- * the connection here. Then we will fall through
- * and treat this event like a T15.
- */
- iscsi_net->close(icp->conn_socket);
-
- /* FALLTHRU */
-
- /*
- * -T15: One or more of the following events caused this transition
- * - Internal event that indicates a transport connection timeout
- * was received thus prompting transport RESET or transport
- * connection closure.
- * - A transport RESET
- * - A transport disconnect indication.
- * - Async PDU with AsyncEvent "Drop connection" (for this CID)
- * - Async PDU with AsyncEvent "Drop all connections"
- */
- case ISCSI_CONN_EVENT_T15:
- icp->conn_state = ISCSI_CONN_STATE_FAILED;
-
- /* stop tx thread, no-op if already done for T14 */
- (void) iscsi_thread_stop(icp->conn_tx_thread);
-
- iscsi_conn_flush_active_cmds(icp);
-
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N5);
- mutex_exit(&isp->sess_state_mutex);
-
- /*
- * If session type is NORMAL, create a new login task
- * to get this connection reestablished.
- */
- if (isp->sess_type == ISCSI_SESS_TYPE_NORMAL) {
- iscsi_conn_retry(isp, icp);
- } else {
- icp->conn_state = ISCSI_CONN_STATE_FREE;
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N6);
- mutex_exit(&isp->sess_state_mutex);
- }
- break;
-
- /* All other events are invalid for this state */
- default:
- ASSERT(FALSE);
- }
-}
-
-
-/*
- * iscsi_conn_state_in_logout -
- *
- */
-static void
-iscsi_conn_state_in_logout(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_sess_t *isp = NULL;
-
- ASSERT(icp != NULL);
- ASSERT(icp->conn_state == ISCSI_CONN_STATE_IN_LOGOUT);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- /* switch on event change */
- switch (event) {
- /*
- * -T11: Async PDU with AsyncEvent "Request Logout" was received again
- */
- case ISCSI_CONN_EVENT_T11:
- icp->conn_state = ISCSI_CONN_STATE_IN_LOGOUT;
-
- /* Already in LOGOUT ignore the request */
- break;
-
- /*
- * -T17: One or more of the following events caused this transition:
- * - Logout response, (failure i.e., a non-zero status) was
- * received, or logout timed out.
- * - Any of the events specified for T15
- *
- * -T14: One or more of the following events case this transition:
- * - Header Digest Error
- * - Protocol Error
- *
- * -T15: One or more of the following events caused this transition
- * - Internal event that indicates a transport connection timeout
- * was received thus prompting transport RESET or transport
- * connection closure.
- * - A transport RESET
- * - A transport disconnect indication.
- * - Async PDU with AsyncEvent "Drop connection" (for this CID)
- * - Async PDU with AsyncEvent "Drop all connections"
- */
- case ISCSI_CONN_EVENT_T17:
- case ISCSI_CONN_EVENT_T14:
- case ISCSI_CONN_EVENT_T15:
- icp->conn_state = ISCSI_CONN_STATE_FREE;
-
- /* stop tx thread */
- (void) iscsi_thread_stop(icp->conn_tx_thread);
-
- /* Disconnect Connection */
- iscsi_net->close(icp->conn_socket);
-
- iscsi_conn_flush_active_cmds(icp);
-
- /* Notify session of a failed logout */
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(icp->conn_sess, ISCSI_SESS_EVENT_N3);
- mutex_exit(&isp->sess_state_mutex);
- break;
-
- /* All other events are invalid for this state */
- default:
- ASSERT(FALSE);
- }
-}
-
-
-/*
- * iscsi_conn_state_failed -
- *
- */
-static void
-iscsi_conn_state_failed(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_sess_t *isp;
-
- ASSERT(icp != NULL);
- ASSERT(icp->conn_state == ISCSI_CONN_STATE_FAILED);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- /* switch on event change */
- switch (event) {
-
- /*
- * -T5: The final iSCSI Login response with a Status-Class of zero
- * was received.
- */
- case ISCSI_CONN_EVENT_T5:
- iscsi_conn_logged_in(isp, icp);
- break;
-
- /*
- * -T30: One of the following event caused the transition:
- * - Thefinal iSCSI Login response was received with a non-zero
- * Status-Class.
- */
- case ISCSI_CONN_EVENT_T30:
- icp->conn_state = ISCSI_CONN_STATE_FREE;
-
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N6);
- mutex_exit(&isp->sess_state_mutex);
-
- break;
-
- /*
- * -T7: One of the following events caused the transition:
- * - Login timed out.
- * - A transport disconnect indication was received.
- * - A transport reset was received.
- * - An internal event indicating a transport timeout was
- * received.
- * - An internal event of receiving a Logout repsonse (success)
- * on another connection for a "close the session" Logout
- * request was received.
- * * In all these cases, the transport connection is closed.
- */
- case ISCSI_CONN_EVENT_T7:
- icp->conn_state = ISCSI_CONN_STATE_POLLING;
-
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N6);
- mutex_exit(&isp->sess_state_mutex);
-
- iscsi_conn_retry(isp, icp);
- break;
-
- /* There are no valid transition out of this state. */
- default:
- ASSERT(FALSE);
- }
-}
-
-/*
- * iscsi_conn_state_polling -
- *
- * S6: POLLING - State on instantiation, or after successful
- * connection closure.
- */
-static void
-iscsi_conn_state_polling(iscsi_conn_t *icp, iscsi_conn_event_t event)
-{
- iscsi_sess_t *isp = NULL;
-
- ASSERT(icp != NULL);
- ASSERT(icp->conn_state == ISCSI_CONN_STATE_POLLING);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- /* switch on event change */
- switch (event) {
- /*
- * -T5: The final iSCSI Login response with a Status-Class of zero
- * was received.
- */
- case ISCSI_CONN_EVENT_T5:
- iscsi_conn_logged_in(isp, icp);
- break;
-
- /*
- * -T30: One of the following event caused the transition:
- * - Thefinal iSCSI Login response was received with a non-zero
- * Status-Class.
- */
- case ISCSI_CONN_EVENT_T30:
- icp->conn_state = ISCSI_CONN_STATE_FREE;
-
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N6);
- mutex_exit(&isp->sess_state_mutex);
-
- break;
-
- /*
- * -T7: One of the following events caused the transition:
- * - Login timed out.
- * - A transport disconnect indication was received.
- * - A transport reset was received.
- * - An internal event indicating a transport timeout was
- * received.
- * - An internal event of receiving a Logout repsonse (success)
- * on another connection for a "close the session" Logout
- * request was received.
- * * In all these cases, the transport connection is closed.
- */
- case ISCSI_CONN_EVENT_T7:
- /*
- * If session type is NORMAL, create a new login task
- * to get this connection reestablished.
- */
- if (isp->sess_type == ISCSI_SESS_TYPE_NORMAL) {
- iscsi_conn_retry(isp, icp);
- } else {
- icp->conn_state = ISCSI_CONN_STATE_FREE;
- }
- break;
-
- /* All other events are invalid for this state */
- default:
- ASSERT(FALSE);
- }
-}
-
-/*
- * iscsi_conn_event_str - converts event enum to a string
- */
-static char *
-iscsi_conn_event_str(iscsi_conn_event_t event)
-{
- switch (event) {
- case ISCSI_CONN_EVENT_T1:
- return ("T1");
- case ISCSI_CONN_EVENT_T5:
- return ("T5");
- case ISCSI_CONN_EVENT_T7:
- return ("T7");
- case ISCSI_CONN_EVENT_T8:
- return ("T8");
- case ISCSI_CONN_EVENT_T9:
- return ("T9");
- case ISCSI_CONN_EVENT_T11:
- return ("T11");
- case ISCSI_CONN_EVENT_T14:
- return ("T14");
- case ISCSI_CONN_EVENT_T15:
- return ("T15");
- case ISCSI_CONN_EVENT_T17:
- return ("T17");
- case ISCSI_CONN_EVENT_T30:
- return ("T30");
-
- default:
- return ("unknown");
- }
-}
-
/*
* iscsi_conn_flush_active_cmds - flush all active icmdps
* for a connection.
@@ -1273,52 +916,31 @@
icmdp = icp->conn_queue_active.head;
}
+ /* Wait for active queue to drain */
+ while (icp->conn_queue_active.count) {
+ mutex_exit(&icp->conn_queue_active.mutex);
+ delay(drv_usectohz(100000));
+ mutex_enter(&icp->conn_queue_active.mutex);
+ }
+
if (lock_held == B_FALSE) {
mutex_exit(&icp->conn_queue_active.mutex);
}
-}
-
-/*
- * iscsi_conn_logged_in - connection has successfully logged in
- */
-static void
-iscsi_conn_logged_in(iscsi_sess_t *isp, iscsi_conn_t *icp)
-{
- ASSERT(isp != NULL);
- ASSERT(icp != NULL);
-
- icp->conn_state = ISCSI_CONN_STATE_LOGGED_IN;
- /*
- * We need to drop the connection state lock
- * before updating the session state. On update
- * of the session state it will enumerate the
- * target. If we hold the lock during enumeration
- * will block the watchdog thread from timing
- * a scsi_pkt, if required. This will lead to
- * a possible hang condition.
- *
- * Also the lock is no longer needed once the
- * connection state was updated.
- */
- mutex_exit(&icp->conn_state_mutex);
-
- /* startup threads */
- (void) iscsi_thread_start(icp->conn_rx_thread);
- (void) iscsi_thread_start(icp->conn_tx_thread);
-
- /* Notify the session that a connection is logged in */
- mutex_enter(&isp->sess_state_mutex);
- iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N1);
- mutex_exit(&isp->sess_state_mutex);
-
- mutex_enter(&icp->conn_state_mutex);
+ /* Wait for IDM abort queue to drain (if necessary) */
+ mutex_enter(&icp->conn_queue_idm_aborting.mutex);
+ while (icp->conn_queue_idm_aborting.count) {
+ mutex_exit(&icp->conn_queue_idm_aborting.mutex);
+ delay(drv_usectohz(100000));
+ mutex_enter(&icp->conn_queue_idm_aborting.mutex);
+ }
+ mutex_exit(&icp->conn_queue_idm_aborting.mutex);
}
/*
* iscsi_conn_retry - retry connect/login
*/
-static void
+void
iscsi_conn_retry(iscsi_sess_t *isp, iscsi_conn_t *icp)
{
iscsi_task_t *itp;
@@ -1331,6 +953,10 @@
ISCSI_CONN_DEFAULT_LOGIN_MIN,
ISCSI_CONN_DEFAULT_LOGIN_MAX);
+ ISCSI_CONN_LOG(CE_NOTE, "DEBUG: iscsi_conn_retry: icp: %p icp: %p ",
+ (void *)icp,
+ (void *)icp->conn_ic);
+
/*
* Sync base connection information before login.
* A login redirection might have shifted the
@@ -1347,15 +973,54 @@
(void(*)())iscsi_login_start, itp, DDI_SLEEP) !=
DDI_SUCCESS) {
kmem_free(itp, sizeof (iscsi_task_t));
- cmn_err(CE_WARN,
- "iscsi connection(%u) failure - "
- "unable to schedule login task",
- icp->conn_oid);
+ cmn_err(CE_WARN, "iscsi connection(%u) failure - "
+ "unable to schedule login task", icp->conn_oid);
- icp->conn_state = ISCSI_CONN_STATE_FREE;
+ iscsi_conn_update_state(icp, ISCSI_CONN_STATE_FREE);
mutex_enter(&isp->sess_state_mutex);
iscsi_sess_state_machine(isp,
ISCSI_SESS_EVENT_N6);
mutex_exit(&isp->sess_state_mutex);
}
}
+
+void
+iscsi_conn_update_state(iscsi_conn_t *icp, iscsi_conn_state_t
+ next_state)
+{
+ mutex_enter(&icp->conn_state_mutex);
+ (void) iscsi_conn_update_state_locked(icp, next_state);
+ mutex_exit(&icp->conn_state_mutex);
+}
+
+void
+iscsi_conn_update_state_locked(iscsi_conn_t *icp,
+ iscsi_conn_state_t next_state)
+{
+ ASSERT(mutex_owned(&icp->conn_state_mutex));
+ next_state = (next_state > ISCSI_CONN_STATE_MAX) ?
+ ISCSI_CONN_STATE_MAX : next_state;
+ idm_sm_audit_state_change(&icp->conn_state_audit,
+ SAS_ISCSI_CONN, icp->conn_state, next_state);
+ switch (next_state) {
+ case ISCSI_CONN_STATE_FREE:
+ case ISCSI_CONN_STATE_IN_LOGIN:
+ case ISCSI_CONN_STATE_LOGGED_IN:
+ case ISCSI_CONN_STATE_IN_LOGOUT:
+ case ISCSI_CONN_STATE_FAILED:
+ case ISCSI_CONN_STATE_POLLING:
+ ISCSI_CONN_LOG(CE_NOTE,
+ "iscsi_conn_update_state conn %p %s(%d) -> %s(%d)",
+ (void *)icp,
+ iscsi_ics_name[icp->conn_state], icp->conn_state,
+ iscsi_ics_name[next_state], next_state);
+ icp->conn_prev_state = icp->conn_state;
+ icp->conn_state = next_state;
+ cv_broadcast(&icp->conn_state_change);
+ break;
+ default:
+ cmn_err(CE_WARN, "Update state found illegal state: %x "
+ "prev_state: %x", next_state, icp->conn_prev_state);
+ ASSERT(0);
+ }
+}
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c Tue Mar 24 17:50:49 2009 -0600
@@ -37,44 +37,61 @@
#include "iscsi.h" /* iscsi driver */
#include <sys/iscsi_protocol.h> /* iscsi protocol */
+#define ISCSI_INI_TASK_TTT 0xffffffff
+
+boolean_t iscsi_io_logging = B_FALSE;
+
+#define ISCSI_CHECK_SCSI_READ(ICHK_CMD, ICHK_HDR, ICHK_LEN, ICHK_TYPE) \
+ if (idm_pattern_checking) { \
+ struct scsi_pkt *pkt = (ICHK_CMD)->cmd_un.scsi.pkt; \
+ if (((ICHK_HDR)->response == 0) && \
+ ((ICHK_HDR)->cmd_status == 0) && \
+ ((pkt->pkt_cdbp[0] == SCMD_READ_G1) || \
+ (pkt->pkt_cdbp[0] == SCMD_READ_G4) || \
+ (pkt->pkt_cdbp[0] == SCMD_READ) || \
+ (pkt->pkt_cdbp[0] == SCMD_READ_G5))) { \
+ idm_buf_t *idb = (ICHK_CMD)->cmd_un.scsi.ibp_ibuf; \
+ IDM_BUFPAT_CHECK(idb, ICHK_LEN, ICHK_TYPE); \
+ } \
+ }
+
/* generic io helpers */
static uint32_t n2h24(uchar_t *ptr);
static int iscsi_sna_lt(uint32_t n1, uint32_t n2);
-static void iscsi_update_flow_control(iscsi_sess_t *isp,
+void iscsi_update_flow_control(iscsi_sess_t *isp,
uint32_t max, uint32_t exp);
+static iscsi_status_t iscsi_rx_process_scsi_itt_to_icmdp(iscsi_sess_t *isp,
+ idm_conn_t *ic, iscsi_scsi_rsp_hdr_t *ihp, iscsi_cmd_t **icmdp);
+static iscsi_status_t iscsi_rx_process_itt_to_icmdp(iscsi_sess_t *isp,
+ iscsi_hdr_t *ihp, iscsi_cmd_t **icmdp);
+static void iscsi_process_rsp_status(iscsi_sess_t *isp, iscsi_conn_t *icp,
+ idm_status_t status);
+static void iscsi_drop_conn_cleanup(iscsi_conn_t *icp);
+
+/* callbacks from idm */
+static idm_pdu_cb_t iscsi_tx_done;
/* receivers */
-static iscsi_status_t iscsi_rx_process_hdr(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data, int data_size);
-static iscsi_status_t iscsi_rx_process_nop(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-static iscsi_status_t iscsi_rx_process_data_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp);
-static iscsi_status_t iscsi_rx_process_cmd_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-static iscsi_status_t iscsi_rx_process_rtt_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-static iscsi_status_t iscsi_rx_process_reject_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-static iscsi_status_t iscsi_rx_process_rejected_tsk_mgt(iscsi_conn_t *icp,
+static idm_status_t iscsi_rx_process_nop(idm_conn_t *ic, idm_pdu_t *pdu);
+static idm_status_t iscsi_rx_process_data_rsp(idm_conn_t *ic,
+ idm_pdu_t *pdu);
+static idm_status_t iscsi_rx_process_cmd_rsp(idm_conn_t *ic, idm_pdu_t *pdu);
+static idm_status_t iscsi_rx_process_reject_rsp(idm_conn_t *ic,
+ idm_pdu_t *pdu);
+
+static idm_status_t iscsi_rx_process_rejected_tsk_mgt(idm_conn_t *ic,
iscsi_hdr_t *old_ihp);
-static iscsi_status_t iscsi_rx_process_itt_to_icmdp(iscsi_sess_t *isp,
- iscsi_hdr_t *ihp, iscsi_cmd_t **icmdp);
-static iscsi_status_t iscsi_rx_process_task_mgt_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, void *data);
-static iscsi_status_t iscsi_rx_process_logout_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-static iscsi_status_t iscsi_rx_process_async_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-static iscsi_status_t iscsi_rx_process_text_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data);
-
+static idm_status_t iscsi_rx_process_task_mgt_rsp(idm_conn_t *ic,
+ idm_pdu_t *pdu);
+static idm_status_t iscsi_rx_process_logout_rsp(idm_conn_t *ic,
+ idm_pdu_t *pdu);
+static idm_status_t iscsi_rx_process_async_rsp(idm_conn_t *ic,
+ idm_pdu_t *pdu);
+static idm_status_t iscsi_rx_process_text_rsp(idm_conn_t *ic,
+ idm_pdu_t *pdu);
/* senders */
static iscsi_status_t iscsi_tx_scsi(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
-static iscsi_status_t iscsi_tx_r2t(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
-static iscsi_status_t iscsi_tx_data(iscsi_sess_t *isp, iscsi_conn_t *icp,
- iscsi_cmd_t *icmdp, uint32_t ttt, size_t datalen, size_t offset);
static iscsi_status_t iscsi_tx_nop(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
static iscsi_status_t iscsi_tx_abort(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
static iscsi_status_t iscsi_tx_reset(iscsi_sess_t *isp, iscsi_cmd_t *icmdp);
@@ -83,19 +100,13 @@
/* helpers */
-static void iscsi_handle_r2t(iscsi_conn_t *icp, iscsi_cmd_t *icmdp,
- uint32_t offset, uint32_t length, uint32_t ttt);
+static void iscsi_logout_start(void *arg);
static void iscsi_handle_passthru_callback(struct scsi_pkt *pkt);
static void iscsi_handle_nop(iscsi_conn_t *icp, uint32_t itt, uint32_t ttt);
static void iscsi_timeout_checks(iscsi_sess_t *isp);
static void iscsi_nop_checks(iscsi_sess_t *isp);
-
-#define ISCSI_CONN_TO_NET_DIGEST(icp) \
- ((icp->conn_params.header_digest ? ISCSI_NET_HEADER_DIGEST : 0) | \
- (icp->conn_params.data_digest ? ISCSI_NET_DATA_DIGEST : 0))
-
/*
* This file contains the main guts of the iSCSI protocol layer.
* It's broken into 5 sections; Basic helper functions, RX IO path,
@@ -198,7 +209,7 @@
* iscsi_update_flow_control - Update expcmdsn and maxcmdsn iSCSI
* flow control information for a session
*/
-static void
+void
iscsi_update_flow_control(iscsi_sess_t *isp, uint32_t max, uint32_t exp)
{
ASSERT(isp != NULL);
@@ -232,268 +243,613 @@
*/
/*
- * iscsi_rx_thread - The connection creates a thread of this
- * function during login. After which point this thread is
- * used to receive and process all iSCSI PDUs on this connection.
- * The PDUs received on this connection are used to drive the
- * commands through their state machine. This thread will
- * continue processing while the connection is on a LOGGED_IN
- * or IN_LOGOUT state. Once the connection moves out of this
- * state the thread will die.
+ * iscsi_rx_scsi_rsp - called from idm
+ * For each opcode type fan out the processing.
*/
void
-iscsi_rx_thread(iscsi_thread_t *thread, void *arg)
+iscsi_rx_scsi_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_conn_t *icp = (iscsi_conn_t *)arg;
- iscsi_sess_t *isp = NULL;
- char *hdr = NULL;
- int hdr_size = 0;
- char *data = NULL;
- int data_size = 0;
- iscsi_hdr_t *ihp;
-
+ iscsi_conn_t *icp;
+ iscsi_sess_t *isp;
+ iscsi_hdr_t *ihp;
+ idm_status_t status;
+
+ ASSERT(ic != NULL);
+ ASSERT(pdu != NULL);
+ icp = ic->ic_handle;
ASSERT(icp != NULL);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- /* pre-alloc recv header buffer for common actions */
- hdr_size = sizeof (iscsi_hdr_t) + 255; /* 255 = one byte hlength */
- hdr = (char *)kmem_zalloc(hdr_size, KM_SLEEP);
- ihp = (iscsi_hdr_t *)hdr;
+ ihp = (iscsi_hdr_t *)pdu->isp_hdr;
ASSERT(ihp != NULL);
-
- /* pre-alloc max_recv_size buffer for common actions */
- data_size = icp->conn_params.max_recv_data_seg_len;
- data = (char *)kmem_zalloc(data_size, KM_SLEEP);
- ASSERT(data != NULL);
-
- do {
- /* Wait for the next iSCSI header */
- rval = iscsi_net->recvhdr(icp->conn_socket,
- ihp, hdr_size, 0, (icp->conn_params.header_digest ?
- ISCSI_NET_HEADER_DIGEST : 0));
- if (ISCSI_SUCCESS(rval)) {
- isp->sess_rx_lbolt =
- icp->conn_rx_lbolt =
- ddi_get_lbolt();
-
- /* Perform specific hdr handling */
- rval = iscsi_rx_process_hdr(icp, ihp,
- data, data_size);
- }
-
- /*
- * handle failures
- */
- switch (rval) {
- case ISCSI_STATUS_SUCCESS:
- /*
- * If we successfully completed a receive
- * and we are in an IN_FLUSH state then
- * check the active queue count to see
- * if its empty. If its empty then force
- * a disconnect event on the connection.
- * This will move the session from IN_FLUSH
- * to FLUSHED and complete the login
- * parameter update.
- */
- if ((isp->sess_state == ISCSI_SESS_STATE_IN_FLUSH) &&
- (icp->conn_queue_active.count == 0)) {
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp,
- ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
- }
- break;
- case ISCSI_STATUS_TCP_RX_ERROR:
- /* connection had an error */
- mutex_enter(&icp->conn_state_mutex);
- /*
- * recvmsg may return after the closing of socket
- * with this error
- */
- if (ISCSI_CONN_STATE_FULL_FEATURE(icp->conn_state)) {
- (void) iscsi_conn_state_machine(icp,
- ISCSI_CONN_EVENT_T15);
- }
- mutex_exit(&icp->conn_state_mutex);
- break;
- case ISCSI_STATUS_HEADER_DIGEST_ERROR:
- /*
- * If we encounter a digest error we have to restart
- * all the connections on this session. per iSCSI
- * Level 0 Recovery.
- */
- KSTAT_INC_CONN_ERR_HEADER_DIGEST(icp);
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp,
- ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
- break;
- case ISCSI_STATUS_DATA_DIGEST_ERROR:
- /*
- * We can continue with a data digest error. The
- * icmdp was flaged as having a crc problem. It
- * will be aborted when all data is received. This
- * saves us from restarting the session when we
- * might be able to keep it going. If the data
- * digest issue was really bad we will hit a
- * status protocol error on the next pdu, which
- * will force a connection retstart.
- */
- KSTAT_INC_CONN_ERR_DATA_DIGEST(icp);
- break;
- case ISCSI_STATUS_PROTOCOL_ERROR:
- /*
- * A protocol problem was encountered. Reset
- * session to try and repair issue.
- */
- KSTAT_INC_CONN_ERR_PROTOCOL(icp);
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp,
- ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
- break;
- case ISCSI_STATUS_INTERNAL_ERROR:
- /*
- * These should have all been handled before now.
- */
- break;
- default:
- cmn_err(CE_WARN, "iscsi connection(%u) encountered "
- "unknown error(%d) on a receive", icp->conn_oid,
- rval);
- ASSERT(B_FALSE);
- }
-
- } while ((ISCSI_CONN_STATE_FULL_FEATURE(icp->conn_state)) &&
- (iscsi_thread_wait(thread, 0) != 0));
-
- kmem_free(hdr, hdr_size);
- kmem_free(data, data_size);
-}
-
-
-/*
- * iscsi_rx_process_hdr - This function collects data for all PDUs
- * that do not have data that will be mapped to a specific scsi_pkt.
- * Then for each hdr type fan out the processing.
- */
-static iscsi_status_t
-iscsi_rx_process_hdr(iscsi_conn_t *icp, iscsi_hdr_t *ihp,
- char *data, int data_size)
-{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_sess_t *isp = NULL;
-
- ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
- ASSERT(data != NULL);
- isp = icp->conn_sess;
+ isp = icp->conn_sess;
ASSERT(isp != NULL);
- /* If this is not a SCSI_DATA_RSP we can go ahead and get the data */
- if ((ihp->opcode & ISCSI_OPCODE_MASK) != ISCSI_OP_SCSI_DATA_RSP) {
- rval = iscsi_net->recvdata(icp->conn_socket, ihp,
- data, data_size, 0, (icp->conn_params.data_digest) ?
- ISCSI_NET_DATA_DIGEST : 0);
- if (!ISCSI_SUCCESS(rval)) {
- return (rval);
- }
- isp->sess_rx_lbolt = icp->conn_rx_lbolt = ddi_get_lbolt();
- }
+ /* reset the session timer when we receive the response */
+ isp->sess_rx_lbolt = icp->conn_rx_lbolt = ddi_get_lbolt();
/* fan out the hdr processing */
switch (ihp->opcode & ISCSI_OPCODE_MASK) {
case ISCSI_OP_SCSI_DATA_RSP:
- rval = iscsi_rx_process_data_rsp(icp, ihp);
+ status = iscsi_rx_process_data_rsp(ic, pdu);
break;
case ISCSI_OP_SCSI_RSP:
- rval = iscsi_rx_process_cmd_rsp(icp, ihp, data);
- break;
- case ISCSI_OP_RTT_RSP:
- rval = iscsi_rx_process_rtt_rsp(icp, ihp, data);
- break;
- case ISCSI_OP_NOOP_IN:
- rval = iscsi_rx_process_nop(icp, ihp, data);
- break;
- case ISCSI_OP_REJECT_MSG:
- rval = iscsi_rx_process_reject_rsp(icp, ihp, data);
- break;
- case ISCSI_OP_SCSI_TASK_MGT_RSP:
- rval = iscsi_rx_process_task_mgt_rsp(icp, ihp, data);
- break;
- case ISCSI_OP_LOGOUT_RSP:
- rval = iscsi_rx_process_logout_rsp(icp, ihp, data);
- break;
- case ISCSI_OP_ASYNC_EVENT:
- rval = iscsi_rx_process_async_rsp(icp, ihp, data);
- break;
- case ISCSI_OP_TEXT_RSP:
- rval = iscsi_rx_process_text_rsp(icp, ihp, data);
+ status = iscsi_rx_process_cmd_rsp(ic, pdu);
+ idm_pdu_complete(pdu, status);
break;
default:
cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received an unsupported opcode 0x%02x",
+ "received pdu with unsupported opcode 0x%02x",
icp->conn_oid, ihp->opcode);
- rval = ISCSI_STATUS_PROTOCOL_ERROR;
+ status = IDM_STATUS_PROTOCOL_ERROR;
+ }
+ iscsi_process_rsp_status(isp, icp, status);
+}
+
+void
+iscsi_task_cleanup(int opcode, iscsi_cmd_t *icmdp)
+{
+ struct buf *bp;
+ idm_buf_t *ibp, *obp;
+ idm_task_t *itp;
+
+ itp = icmdp->cmd_itp;
+ ASSERT(itp != NULL);
+ ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
+ (opcode == ISCSI_OP_SCSI_RSP));
+
+ bp = icmdp->cmd_un.scsi.bp;
+ ibp = icmdp->cmd_un.scsi.ibp_ibuf;
+ obp = icmdp->cmd_un.scsi.ibp_obuf;
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: task_cleanup: itp: %p opcode: %d "
+ "icmdp: %p bp: %p ibp: %p", (void *)itp, opcode,
+ (void *)icmdp, (void *)bp, (void *)ibp);
+ if (bp && bp->b_bcount) {
+ if (ibp != NULL && bp->b_flags & B_READ) {
+ idm_buf_unbind_in(itp, ibp);
+ idm_buf_free(ibp);
+ icmdp->cmd_un.scsi.ibp_ibuf = NULL;
+ } else if (obp != NULL && !(bp->b_flags & B_READ)) {
+ idm_buf_unbind_out(itp, obp);
+ idm_buf_free(obp);
+ icmdp->cmd_un.scsi.ibp_obuf = NULL;
+ }
+ }
+
+ idm_task_done(itp);
+}
+
+idm_status_t
+iscsi_rx_chk(iscsi_conn_t *icp, iscsi_sess_t *isp,
+ iscsi_scsi_rsp_hdr_t *irhp, iscsi_cmd_t **icmdp)
+{
+ iscsi_status_t rval;
+
+ mutex_enter(&isp->sess_cmdsn_mutex);
+
+ if (icp->conn_expstatsn == ntohl(irhp->statsn)) {
+ icp->conn_expstatsn++;
+ } else {
+ cmn_err(CE_WARN, "iscsi connection(%u/%x) protocol error - "
+ "received status out of order itt:0x%x statsn:0x%x "
+ "expstatsn:0x%x", icp->conn_oid, irhp->opcode,
+ irhp->itt, ntohl(irhp->statsn), icp->conn_expstatsn);
+ mutex_exit(&isp->sess_cmdsn_mutex);
+ return (IDM_STATUS_PROTOCOL_ERROR);
+ }
+
+ /* get icmdp so we can cleanup on error */
+ if ((irhp->opcode == ISCSI_OP_SCSI_DATA_RSP) ||
+ (irhp->opcode == ISCSI_OP_SCSI_RSP)) {
+ rval = iscsi_rx_process_scsi_itt_to_icmdp(isp, icp->conn_ic,
+ irhp, icmdp);
+ } else {
+ rval = iscsi_rx_process_itt_to_icmdp(isp,
+ (iscsi_hdr_t *)irhp, icmdp);
+ }
+
+ if (!ISCSI_SUCCESS(rval)) {
+ mutex_exit(&isp->sess_cmdsn_mutex);
+ return (IDM_STATUS_PROTOCOL_ERROR);
+ }
+
+ /* update expcmdsn and maxcmdsn */
+ iscsi_update_flow_control(isp, ntohl(irhp->maxcmdsn),
+ ntohl(irhp->expcmdsn));
+ mutex_exit(&isp->sess_cmdsn_mutex);
+ return (IDM_STATUS_SUCCESS);
+}
+
+static void
+iscsi_cmd_rsp_chk(iscsi_cmd_t *icmdp, iscsi_scsi_rsp_hdr_t *issrhp)
+{
+ struct scsi_pkt *pkt;
+ size_t data_transferred;
+
+ pkt = icmdp->cmd_un.scsi.pkt;
+ pkt->pkt_resid = 0;
+ data_transferred = icmdp->cmd_un.scsi.data_transferred;
+ /* Check the residual count */
+ if ((icmdp->cmd_un.scsi.bp) &&
+ (data_transferred != icmdp->cmd_un.scsi.bp->b_bcount)) {
+ /*
+ * We didn't xfer the expected amount of data -
+ * the residual_count in the header is only
+ * valid if the underflow flag is set.
+ */
+ if (issrhp->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+ pkt->pkt_resid = ntohl(issrhp->residual_count);
+ } else {
+ if (icmdp->cmd_un.scsi.bp->b_bcount >
+ data_transferred) {
+ /*
+ * Some data fell on the floor
+ * somehow - probably a CRC error
+ */
+ pkt->pkt_resid =
+ icmdp->cmd_un.scsi.bp->b_bcount -
+ data_transferred;
+ }
+ }
+ ISCSI_IO_LOG(CE_NOTE,
+ "DEBUG: iscsi_rx_cmd_rsp_chk: itt: %u"
+ "data_trans != b_count data_transferred: %lu "
+ "b_count: %lu cmd_status: %d flags: %d resid: %lu",
+ issrhp->itt, data_transferred,
+ icmdp->cmd_un.scsi.bp->b_bcount,
+ issrhp->cmd_status & STATUS_MASK,
+ issrhp->flags, pkt->pkt_resid);
+ }
+ /* set flags that tell SCSA that the command is complete */
+ if (icmdp->cmd_crc_error_seen == B_FALSE) {
+ /* Set successful completion */
+ pkt->pkt_reason = CMD_CMPLT;
+ if (icmdp->cmd_un.scsi.bp) {
+ pkt->pkt_state |= (STATE_XFERRED_DATA |
+ STATE_GOT_STATUS);
+ } else {
+ pkt->pkt_state |= STATE_GOT_STATUS;
+ }
+ } else {
+ /*
+ * Some of the data was found to have an incorrect
+ * error at the protocol error.
+ */
+ pkt->pkt_reason = CMD_PER_FAIL;
+ pkt->pkt_statistics |= STAT_PERR;
+ if (icmdp->cmd_un.scsi.bp) {
+ pkt->pkt_resid =
+ icmdp->cmd_un.scsi.bp->b_bcount;
+ } else {
+ pkt->pkt_resid = 0;
+ }
}
-
- return (rval);
}
+static void
+iscsi_cmd_rsp_cmd_status(iscsi_cmd_t *icmdp, iscsi_scsi_rsp_hdr_t *issrhp,
+ uint8_t *data)
+{
+ uint32_t dlength = 0;
+ struct scsi_arq_status *arqstat = NULL;
+ size_t senselen = 0;
+ int statuslen = 0;
+ struct scsi_pkt *pkt;
+
+ pkt = icmdp->cmd_un.scsi.pkt;
+ dlength = n2h24(issrhp->dlength);
+
+ /*
+ * Process iSCSI Cmd Response Status
+ * RFC 3720 Sectionn 10.4.2.
+ */
+ switch (issrhp->cmd_status & STATUS_MASK) {
+ case STATUS_GOOD:
+ /* pass SCSI status up stack */
+ if (pkt->pkt_scbp) {
+ pkt->pkt_scbp[0] = issrhp->cmd_status;
+ }
+ break;
+ case STATUS_CHECK:
+ /*
+ * Verify we received a sense buffer and
+ * that there is the correct amount of
+ * request sense space to copy it to.
+ */
+ if ((dlength > 1) &&
+ (pkt->pkt_scbp != NULL) &&
+ (icmdp->cmd_un.scsi.statuslen >=
+ sizeof (struct scsi_arq_status))) {
+ /*
+ * If a bad command status is received we
+ * need to reset the pkt_resid to zero.
+ * The target driver compares its value
+ * before checking other error flags.
+ * (ex. check conditions)
+ */
+ pkt->pkt_resid = 0;
+
+ /* get sense length from first 2 bytes */
+ senselen = ((data[0] << 8) | data[1]) &
+ (size_t)0xFFFF;
+ ISCSI_IO_LOG(CE_NOTE,
+ "DEBUG: iscsi_rx_cmd_rsp_cmd_status status_check: "
+ "dlen: %d scbp: %p statuslen: %d arq: %d senselen:"
+ " %lu", dlength, (void *)pkt->pkt_scbp,
+ icmdp->cmd_un.scsi.statuslen,
+ (int)sizeof (struct scsi_arq_status),
+ senselen);
+
+ /* Sanity-check on the sense length */
+ if ((senselen + 2) > dlength) {
+ senselen = dlength - 2;
+ }
+
+ /*
+ * If there was a Data Digest error then
+ * the sense data cannot be trusted.
+ */
+ if (icmdp->cmd_crc_error_seen) {
+ senselen = 0;
+ }
+
+ /* automatic request sense */
+ arqstat =
+ (struct scsi_arq_status *)pkt->pkt_scbp;
+
+ /* pass SCSI status up stack */
+ *((uchar_t *)&arqstat->sts_status) =
+ issrhp->cmd_status;
+
+ /*
+ * Set the status for the automatic
+ * request sense command
+ */
+ arqstat->sts_rqpkt_state = (STATE_GOT_BUS |
+ STATE_GOT_TARGET | STATE_SENT_CMD |
+ STATE_XFERRED_DATA | STATE_GOT_STATUS |
+ STATE_ARQ_DONE);
+
+ *((uchar_t *)&arqstat->sts_rqpkt_status) =
+ STATUS_GOOD;
+
+ arqstat->sts_rqpkt_reason = CMD_CMPLT;
+
+ statuslen = icmdp->cmd_un.scsi.statuslen;
+
+ if (senselen == 0) {
+ /* auto request sense failed */
+ arqstat->sts_rqpkt_status.sts_chk = 1;
+ arqstat->sts_rqpkt_resid = statuslen;
+ } else if (senselen < statuslen) {
+ /* auto request sense short */
+ arqstat->sts_rqpkt_resid = statuslen - senselen;
+ } else {
+ /* auto request sense complete */
+ arqstat->sts_rqpkt_resid = 0;
+ }
+ arqstat->sts_rqpkt_statistics = 0;
+ pkt->pkt_state |= STATE_ARQ_DONE;
+
+ if (icmdp->cmd_misc_flags & ISCSI_CMD_MISCFLAG_XARQ) {
+ pkt->pkt_state |= STATE_XARQ_DONE;
+ }
+
+ /* copy auto request sense */
+ dlength = min(senselen, statuslen);
+ if (dlength) {
+ bcopy(&data[2], (uchar_t *)&arqstat->
+ sts_sensedata, dlength);
+ }
+ break;
+ }
+ /* FALLTHRU */
+ case STATUS_BUSY:
+ case STATUS_RESERVATION_CONFLICT:
+ case STATUS_QFULL:
+ case STATUS_ACA_ACTIVE:
+ default:
+ /*
+ * If a bad command status is received we need to
+ * reset the pkt_resid to zero. The target driver
+ * compares its value before checking other error
+ * flags. (ex. check conditions)
+ */
+ ISCSI_IO_LOG(CE_NOTE,
+ "DEBUG: iscsi_rx_cmd_rsp_cmd_status: status: "
+ "%d cmd_status: %d dlen: %u scbp: %p statuslen: %d "
+ "arg_len: %d", issrhp->cmd_status & STATUS_MASK,
+ issrhp->cmd_status, dlength, (void *)pkt->pkt_scbp,
+ icmdp->cmd_un.scsi.statuslen,
+ (int)sizeof (struct scsi_arq_status));
+ pkt->pkt_resid = 0;
+ /* pass SCSI status up stack */
+ if (pkt->pkt_scbp) {
+ pkt->pkt_scbp[0] = issrhp->cmd_status;
+ }
+ }
+}
/*
- * iscsi_rx_process_data_rsp - Processed received data header. Once
- * header is processed we read data off the connection directly into
- * the scsi_pkt to avoid duplicate bcopy of a large amount of data.
- * If this is the final data sequence denoted by the data response
+ * iscsi_rx_process_login_pdup - Process login response PDU. This function
+ * copies the data into the connection context so that the login code can
+ * interpret it.
+ */
+
+idm_status_t
+iscsi_rx_process_login_pdu(idm_conn_t *ic, idm_pdu_t *pdu)
+{
+ iscsi_conn_t *icp;
+
+ icp = ic->ic_handle;
+
+ /*
+ * Copy header and data into connection structure so iscsi_login()
+ * can process it.
+ */
+ mutex_enter(&icp->conn_login_mutex);
+ /*
+ * If conn_login_state != LOGIN_TX then we are not ready to handle
+ * this login response and we should just drop it.
+ */
+ if (icp->conn_login_state == LOGIN_TX) {
+ icp->conn_login_datalen = pdu->isp_datalen;
+ bcopy(pdu->isp_hdr, &icp->conn_login_resp_hdr,
+ sizeof (iscsi_hdr_t));
+ /*
+ * Login code is sloppy with it's NULL handling so make sure
+ * we don't leave any stale data in there.
+ */
+ bzero(icp->conn_login_data, icp->conn_login_max_data_length);
+ bcopy(pdu->isp_data, icp->conn_login_data,
+ MIN(pdu->isp_datalen, icp->conn_login_max_data_length));
+ iscsi_login_update_state_locked(icp, LOGIN_RX);
+ }
+ mutex_exit(&icp->conn_login_mutex);
+
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iscsi_rx_process_cmd_rsp - Process received scsi command response. This
+ * will contain sense data if the command was not successful. This data needs
+ * to be copied into the scsi_pkt. Otherwise we just complete the IO.
+ */
+static idm_status_t
+iscsi_rx_process_cmd_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
+{
+ iscsi_conn_t *icp = ic->ic_handle;
+ iscsi_sess_t *isp = icp->conn_sess;
+ iscsi_scsi_rsp_hdr_t *issrhp = (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr;
+ uint8_t *data = pdu->isp_data;
+ iscsi_cmd_t *icmdp = NULL;
+ struct scsi_pkt *pkt = NULL;
+ idm_status_t rval;
+ struct buf *bp;
+
+ /* make sure we get status in order */
+ mutex_enter(&icp->conn_queue_active.mutex);
+
+ if ((rval = iscsi_rx_chk(icp, isp, issrhp,
+ &icmdp)) != IDM_STATUS_SUCCESS) {
+ if (icmdp != NULL) {
+ iscsi_task_cleanup(issrhp->opcode, icmdp);
+ }
+ mutex_exit(&icp->conn_queue_active.mutex);
+ return (rval);
+ }
+
+ /*
+ * If we are in "idm aborting" state then we shouldn't continue
+ * to process this command. By definition this command is no longer
+ * on the active queue so we shouldn't try to remove it either.
+ */
+ mutex_enter(&icmdp->cmd_mutex);
+ if (icmdp->cmd_state == ISCSI_CMD_STATE_IDM_ABORTING) {
+ mutex_exit(&icmdp->cmd_mutex);
+ mutex_exit(&icp->conn_queue_active.mutex);
+ return (IDM_STATUS_SUCCESS);
+ }
+ mutex_exit(&icmdp->cmd_mutex);
+
+ /* Get the IDM buffer and bytes transferred */
+ bp = icmdp->cmd_un.scsi.bp;
+ if (ic->ic_conn_flags & IDM_CONN_USE_SCOREBOARD) {
+ /* Transport tracks bytes transferred so use those counts */
+ if (bp && (bp->b_flags & B_READ)) {
+ icmdp->cmd_un.scsi.data_transferred +=
+ icmdp->cmd_itp->idt_rx_bytes;
+ } else {
+ icmdp->cmd_un.scsi.data_transferred +=
+ icmdp->cmd_itp->idt_tx_bytes;
+ }
+ } else {
+ /*
+ * Some transports cannot track the bytes transferred on
+ * the initiator side (like iSER) so we have to use the
+ * status info. If the response field indicates that
+ * the command actually completed then we will assume
+ * the data_transferred value represents the entire buffer
+ * unless the resid field says otherwise. This is a bit
+ * unintuitive but it's really impossible to know what
+ * has been transferred without detailed consideration
+ * of the SCSI status and sense key and that is outside
+ * the scope of the transport. Instead the target/class driver
+ * can consider these values along with the resid and figure
+ * it out. The data_transferred concept is just belt and
+ * suspenders anyway -- RFC 3720 actually explicitly rejects
+ * scoreboarding ("Initiators SHOULD NOT keep track of the
+ * data transferred to or from the target (scoreboarding)")
+ * perhaps for this very reason.
+ */
+ if (issrhp->response != 0) {
+ icmdp->cmd_un.scsi.data_transferred = 0;
+ } else {
+ icmdp->cmd_un.scsi.data_transferred =
+ (bp == NULL) ? 0 : bp->b_bcount;
+ if (issrhp->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+ icmdp->cmd_un.scsi.data_transferred -=
+ ntohl(issrhp->residual_count);
+ }
+ }
+ }
+
+ ISCSI_CHECK_SCSI_READ(icmdp, issrhp,
+ icmdp->cmd_un.scsi.data_transferred,
+ BP_CHECK_THOROUGH);
+
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: rx_process_cmd_rsp: ic: %p pdu: %p itt:"
+ " %x expcmdsn: %x sess_cmd: %x sess_expcmdsn: %x data_transfered:"
+ " %lu ibp: %p obp: %p", (void *)ic, (void *)pdu, issrhp->itt,
+ issrhp->expcmdsn, isp->sess_cmdsn, isp->sess_expcmdsn,
+ icmdp->cmd_un.scsi.data_transferred,
+ (void *)icmdp->cmd_un.scsi.ibp_ibuf,
+ (void *)icmdp->cmd_un.scsi.ibp_obuf);
+
+ iscsi_task_cleanup(issrhp->opcode, icmdp);
+
+ if (issrhp->response) {
+ /* The target failed the command. */
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: rx_process_cmd_rsp: ic: %p pdu:"
+ " %p response: %d bcount: %lu", (void *)ic, (void *)pdu,
+ issrhp->response, icmdp->cmd_un.scsi.bp->b_bcount);
+ pkt = icmdp->cmd_un.scsi.pkt;
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ if (icmdp->cmd_un.scsi.bp) {
+ pkt->pkt_resid = icmdp->cmd_un.scsi.bp->b_bcount;
+ } else {
+ pkt->pkt_resid = 0;
+ }
+ } else {
+ /* success */
+ iscsi_cmd_rsp_chk(icmdp, issrhp);
+ iscsi_cmd_rsp_cmd_status(icmdp, issrhp, data);
+ }
+
+ iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E3, isp);
+ mutex_exit(&icp->conn_queue_active.mutex);
+ return (IDM_STATUS_SUCCESS);
+}
+
+static void
+iscsi_data_rsp_pkt(iscsi_cmd_t *icmdp, iscsi_data_rsp_hdr_t *idrhp)
+{
+ struct buf *bp = NULL;
+ size_t data_transferred;
+ struct scsi_pkt *pkt;
+
+ bp = icmdp->cmd_un.scsi.bp;
+ pkt = icmdp->cmd_un.scsi.pkt;
+ data_transferred = icmdp->cmd_un.scsi.data_transferred;
+ /*
+ * The command* must be completed now, since we won't get a command
+ * response PDU. The cmd_status and residual_count are
+ * not meaningful unless status_present is set.
+ */
+ pkt->pkt_resid = 0;
+ /* Check the residual count */
+ if (bp && (data_transferred != bp->b_bcount)) {
+ /*
+ * We didn't xfer the expected amount of data -
+ * the residual_count in the header is only valid
+ * if the underflow flag is set.
+ */
+ if (idrhp->flags & ISCSI_FLAG_DATA_UNDERFLOW) {
+ pkt->pkt_resid = ntohl(idrhp->residual_count);
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: iscsi_data_rsp_pkt: "
+ "underflow: itt: %d "
+ "transferred: %lu count: %lu", idrhp->itt,
+ data_transferred, bp->b_bcount);
+ } else {
+ if (bp->b_bcount > data_transferred) {
+ /* Some data fell on the floor somehw */
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: "
+ "iscsi_data_rsp_pkt: data fell: itt: %d "
+ "transferred: %lu count: %lu", idrhp->itt,
+ data_transferred, bp->b_bcount);
+ pkt->pkt_resid =
+ bp->b_bcount - data_transferred;
+ }
+ }
+ }
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_state |= (STATE_XFERRED_DATA | STATE_GOT_STATUS);
+
+ if (((idrhp->cmd_status & STATUS_MASK) != STATUS_GOOD) &&
+ (icmdp->cmd_un.scsi.statuslen >=
+ sizeof (struct scsi_arq_status)) && pkt->pkt_scbp) {
+
+ /*
+ * Not supposed to get exception status here!
+ * We have no request sense data so just do the
+ * best we can
+ */
+ struct scsi_arq_status *arqstat =
+ (struct scsi_arq_status *)pkt->pkt_scbp;
+
+
+ bzero(arqstat, sizeof (struct scsi_arq_status));
+
+ *((uchar_t *)&arqstat->sts_status) =
+ idrhp->cmd_status;
+
+ arqstat->sts_rqpkt_resid =
+ sizeof (struct scsi_extended_sense);
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: iscsi_data_rsp_pkt: "
+ "exception status: itt: %d resid: %d",
+ idrhp->itt, arqstat->sts_rqpkt_resid);
+
+ } else if (pkt->pkt_scbp) {
+ /* just pass along the status we got */
+ pkt->pkt_scbp[0] = idrhp->cmd_status;
+ }
+}
+
+/*
+ * iscsi_rx_process_data_rsp -
+ * This currently processes the final data sequence denoted by the data response
* PDU Status bit being set. We will not receive the SCSI response.
* This bit denotes that the PDU is the successful completion of the
- * command. In this case complete the command. If This bit isn't
- * set we wait for more data or a scsi command response.
+ * command.
*/
-static iscsi_status_t
-iscsi_rx_process_data_rsp(iscsi_conn_t *icp, iscsi_hdr_t *ihp)
+static idm_status_t
+iscsi_rx_process_data_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_sess_t *isp = NULL;
- iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
- iscsi_cmd_t *icmdp = NULL;
- struct scsi_pkt *pkt = NULL;
- struct buf *bp = NULL;
- uint32_t offset = 0;
- uint32_t dlength = 0;
- char *bcp = NULL;
-
- ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
+ iscsi_sess_t *isp = NULL;
+ iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)pdu->isp_hdr;
+ iscsi_cmd_t *icmdp = NULL;
+ struct buf *bp = NULL;
+ iscsi_conn_t *icp = ic->ic_handle;
+ idm_buf_t *ibp;
+ idm_status_t rval;
+
+
+ /* should only call this when the data rsp contains final rsp */
+ ASSERT(idrhp->flags & ISCSI_FLAG_DATA_STATUS);
isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- if (idrhp->flags & ISCSI_FLAG_DATA_STATUS) {
- /* make sure we got status in order */
- if (icp->conn_expstatsn == ntohl(idrhp->statsn)) {
- icp->conn_expstatsn++;
- } else {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error "
- "- received status out of order itt:0x%x "
- "statsn:0x%x expstatsn:0x%x", icp->conn_oid,
- idrhp->itt, ntohl(idrhp->statsn),
- icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+
+ mutex_enter(&icp->conn_queue_active.mutex);
+ if ((rval = iscsi_rx_chk(icp, isp, (iscsi_scsi_rsp_hdr_t *)idrhp,
+ &icmdp)) != IDM_STATUS_SUCCESS) {
+ if (icmdp != NULL) {
+ iscsi_task_cleanup(idrhp->opcode, icmdp);
}
+ mutex_exit(&icp->conn_queue_active.mutex);
+ return (rval);
}
- /* match itt in the session's command table */
- mutex_enter(&icp->conn_queue_active.mutex);
- mutex_enter(&isp->sess_cmdsn_mutex);
- if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(isp, ihp, &icmdp))) {
- mutex_exit(&isp->sess_cmdsn_mutex);
+ /*
+ * If we are in "idm aborting" state then we shouldn't continue
+ * to process this command. By definition this command is no longer
+ * on the active queue so we shouldn't try to remove it either.
+ */
+ mutex_enter(&icmdp->cmd_mutex);
+ if (icmdp->cmd_state == ISCSI_CMD_STATE_IDM_ABORTING) {
+ mutex_exit(&icmdp->cmd_mutex);
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_SUCCESS);
}
+ mutex_exit(&icmdp->cmd_mutex);
+
/*
* Holding the pending/active queue locks across the
* iscsi_rx_data call later in this function may cause
@@ -505,17 +861,10 @@
*/
iscsi_dequeue_active_cmd(icp, icmdp);
- /* update expcmdsn and maxcmdsn */
- iscsi_update_flow_control(isp, ntohl(idrhp->maxcmdsn),
- ntohl(idrhp->expcmdsn));
- mutex_exit(&isp->sess_cmdsn_mutex);
mutex_exit(&icp->conn_queue_active.mutex);
/* shorthand some values */
- pkt = icmdp->cmd_un.scsi.pkt;
bp = icmdp->cmd_un.scsi.bp;
- offset = ntohl(idrhp->offset);
- dlength = n2h24(idrhp->dlength);
/*
* some poorly behaved targets have been observed
@@ -531,464 +880,95 @@
mutex_enter(&icp->conn_queue_active.mutex);
iscsi_enqueue_active_cmd(icp, icmdp);
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
- }
-
- /*
- * We can't tolerate the target sending too much
- * data for our buffer
- */
- if ((dlength >
- (bp->b_bcount - icmdp->cmd_un.scsi.data_transferred)) ||
- (dlength > (bp->b_bcount - offset))) {
- cmn_err(CE_WARN,
- "iscsi connection(%u) protocol error - "
- "received too much data itt:0x%x",
- icp->conn_oid, idrhp->itt);
- mutex_enter(&icp->conn_queue_active.mutex);
- iscsi_enqueue_active_cmd(icp, icmdp);
- mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
-
- bcp = ((char *)bp->b_un.b_addr) + offset;
-
- /*
- * Get the rest of the data and copy it directly into
- * the scsi_pkt.
- */
- rval = iscsi_net->recvdata(icp->conn_socket, ihp,
- bcp, dlength, 0, (icp->conn_params.data_digest ?
- ISCSI_NET_DATA_DIGEST : 0));
- if (ISCSI_SUCCESS(rval)) {
- KSTAT_ADD_CONN_RX_BYTES(icp, dlength);
- } else {
- /* If digest error flag icmdp with a crc error */
- if (rval == ISCSI_STATUS_DATA_DIGEST_ERROR) {
- icmdp->cmd_crc_error_seen = B_TRUE;
- }
- mutex_enter(&icp->conn_queue_active.mutex);
- iscsi_enqueue_active_cmd(icp, icmdp);
- mutex_exit(&icp->conn_queue_active.mutex);
- return (rval);
- }
- isp->sess_rx_lbolt = icp->conn_rx_lbolt = ddi_get_lbolt();
-
- /* update icmdp statistics */
- icmdp->cmd_un.scsi.data_transferred += dlength;
}
- /*
- * We got status. This should only happen if we have
- * received all the data with no errors. The command
- * must be completed now, since we won't get a command
- * response PDU. The cmd_status and residual_count are
- * not meaningful unless status_present is set.
- */
- if (idrhp->flags & ISCSI_FLAG_DATA_STATUS) {
- pkt->pkt_resid = 0;
- /* Check the residual count */
- if (bp &&
- (icmdp->cmd_un.scsi.data_transferred !=
- bp->b_bcount)) {
- /*
- * We didn't xfer the expected amount of data -
- * the residual_count in the header is only valid
- * if the underflow flag is set.
- */
- if (idrhp->flags & ISCSI_FLAG_DATA_UNDERFLOW) {
- pkt->pkt_resid = ntohl(idrhp->residual_count);
- } else {
- if (bp->b_bcount >
- icmdp->cmd_un.scsi.data_transferred) {
- /* Some data fell on the floor somehw */
- pkt->pkt_resid =
- bp->b_bcount -
- icmdp->cmd_un.scsi.data_transferred;
- }
- }
- }
-
- pkt->pkt_reason = CMD_CMPLT;
- pkt->pkt_state |= (STATE_XFERRED_DATA | STATE_GOT_STATUS);
-
- if (((idrhp->cmd_status & STATUS_MASK) != STATUS_GOOD) &&
- (icmdp->cmd_un.scsi.statuslen >=
- sizeof (struct scsi_arq_status)) && pkt->pkt_scbp) {
-
- /*
- * Not supposed to get exception status here!
- * We have no request sense data so just do the
- * best we can
- */
- struct scsi_arq_status *arqstat =
- (struct scsi_arq_status *)pkt->pkt_scbp;
-
-
- bzero(arqstat, sizeof (struct scsi_arq_status));
-
- *((uchar_t *)&arqstat->sts_status) =
- idrhp->cmd_status;
-
- arqstat->sts_rqpkt_resid =
- sizeof (struct scsi_extended_sense);
-
- } else if (pkt->pkt_scbp) {
- /* just pass along the status we got */
- pkt->pkt_scbp[0] = idrhp->cmd_status;
- }
-
- mutex_enter(&icp->conn_queue_active.mutex);
- iscsi_enqueue_active_cmd(icp, icmdp);
- iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E3, isp);
- mutex_exit(&icp->conn_queue_active.mutex);
- } else {
+ ibp = icmdp->cmd_un.scsi.ibp_ibuf;
+ if (ibp == NULL) {
+ /*
+ * After the check of bp above we *should* have a corresponding
+ * idm_buf_t (ibp). It's possible that the original call
+ * to idm_buf_alloc failed due to a pending connection state
+ * transition in which case this value can be NULL. It's
+ * highly unlikely that the connection would be shutting down
+ * *and* we manage to process a data response and get to this
+ * point in the code but just in case we should check for it.
+ * This isn't really a protocol error -- we are almost certainly
+ * closing the connection anyway so just return a generic error.
+ */
mutex_enter(&icp->conn_queue_active.mutex);
iscsi_enqueue_active_cmd(icp, icmdp);
mutex_exit(&icp->conn_queue_active.mutex);
+ return (IDM_STATUS_FAIL);
}
- return (ISCSI_STATUS_SUCCESS);
-}
-
-
-/*
- * iscsi_rx_process_cmd_rsp - Process received scsi command response. This
- * will contain sense data if the command was not successful. This data needs
- * to be copied into the scsi_pkt. Otherwise we just complete the IO.
- */
-static iscsi_status_t
-iscsi_rx_process_cmd_rsp(iscsi_conn_t *icp, iscsi_hdr_t *ihp, char *data)
-{
- iscsi_sess_t *isp = icp->conn_sess;
- iscsi_scsi_rsp_hdr_t *issrhp = (iscsi_scsi_rsp_hdr_t *)ihp;
- iscsi_cmd_t *icmdp = NULL;
- struct scsi_pkt *pkt = NULL;
- uint32_t dlength = 0;
- struct scsi_arq_status *arqstat = NULL;
- size_t senselen = 0;
- int statuslen = 0;
-
- /* make sure we get status in order */
- if (icp->conn_expstatsn == ntohl(issrhp->statsn)) {
- icp->conn_expstatsn++;
+ if (ic->ic_conn_flags & IDM_CONN_USE_SCOREBOARD) {
+ icmdp->cmd_un.scsi.data_transferred =
+ icmdp->cmd_itp->idt_rx_bytes;
} else {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, issrhp->itt,
- ntohl(issrhp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ icmdp->cmd_un.scsi.data_transferred = bp->b_bcount;
+ if (idrhp->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+ icmdp->cmd_un.scsi.data_transferred -=
+ ntohl(idrhp->residual_count);
+ }
}
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: rx_process_data_rsp: icp: %p pdu: %p "
+ "itt: %d ibp: %p icmdp: %p xfer_len: %lu transferred: %lu dlen: %u",
+ (void *)icp, (void *)pdu, idrhp->itt, (void *)bp, (void *)icmdp,
+ (ibp == NULL) ? 0 : ibp->idb_xfer_len,
+ icmdp->cmd_un.scsi.data_transferred,
+ n2h24(idrhp->dlength));
+
+ iscsi_task_cleanup(idrhp->opcode, icmdp);
+
+ iscsi_data_rsp_pkt(icmdp, idrhp);
+
mutex_enter(&icp->conn_queue_active.mutex);
- mutex_enter(&isp->sess_cmdsn_mutex);
- if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(isp, ihp, &icmdp))) {
- mutex_exit(&isp->sess_cmdsn_mutex);
- mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
- }
-
- /* update expcmdsn and maxcmdsn */
- iscsi_update_flow_control(isp, ntohl(issrhp->maxcmdsn),
- ntohl(issrhp->expcmdsn));
- mutex_exit(&isp->sess_cmdsn_mutex);
-
- pkt = icmdp->cmd_un.scsi.pkt;
-
- if (issrhp->response) {
- /* The target failed the command. */
- pkt->pkt_reason = CMD_TRAN_ERR;
- if (icmdp->cmd_un.scsi.bp) {
- pkt->pkt_resid = icmdp->cmd_un.scsi.bp->b_bcount;
- } else {
- pkt->pkt_resid = 0;
- }
- } else {
- /* success */
- pkt->pkt_resid = 0;
- /* Check the residual count */
- if ((icmdp->cmd_un.scsi.bp) &&
- (icmdp->cmd_un.scsi.data_transferred !=
- icmdp->cmd_un.scsi.bp->b_bcount)) {
- /*
- * We didn't xfer the expected amount of data -
- * the residual_count in the header is only
- * valid if the underflow flag is set.
- */
- if (issrhp->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
- pkt->pkt_resid = ntohl(issrhp->residual_count);
- } else {
- if (icmdp->cmd_un.scsi.bp->b_bcount >
- icmdp->cmd_un.scsi.data_transferred) {
- /*
- * Some data fell on the floor
- * somehow - probably a CRC error
- */
- pkt->pkt_resid =
- icmdp->cmd_un.scsi.bp->b_bcount -
- icmdp->cmd_un.scsi.data_transferred;
- }
- }
- }
-
- /* set flags that tell SCSA that the command is complete */
- if (icmdp->cmd_crc_error_seen == B_FALSE) {
- /* Set successful completion */
- pkt->pkt_reason = CMD_CMPLT;
- if (icmdp->cmd_un.scsi.bp) {
- pkt->pkt_state |= (STATE_XFERRED_DATA |
- STATE_GOT_STATUS);
- } else {
- pkt->pkt_state |= STATE_GOT_STATUS;
- }
- } else {
- /*
- * Some of the data was found to have an incorrect
- * error at the protocol error.
- */
- pkt->pkt_reason = CMD_PER_FAIL;
- pkt->pkt_statistics |= STAT_PERR;
- if (icmdp->cmd_un.scsi.bp) {
- pkt->pkt_resid =
- icmdp->cmd_un.scsi.bp->b_bcount;
- } else {
- pkt->pkt_resid = 0;
- }
- }
-
- dlength = n2h24(issrhp->dlength);
-
- /*
- * Process iSCSI Cmd Response Status
- * RFC 3720 Sectionn 10.4.2.
- */
- switch (issrhp->cmd_status & STATUS_MASK) {
- case STATUS_GOOD:
- /* pass SCSI status up stack */
- if (pkt->pkt_scbp) {
- pkt->pkt_scbp[0] = issrhp->cmd_status;
- }
- break;
- case STATUS_CHECK:
- /*
- * Verify we received a sense buffer and
- * that there is the correct amount of
- * request sense space to copy it to.
- */
- if ((dlength > 1) &&
- (pkt->pkt_scbp != NULL) &&
- (icmdp->cmd_un.scsi.statuslen >=
- sizeof (struct scsi_arq_status))) {
- /*
- * If a bad command status is received we
- * need to reset the pkt_resid to zero.
- * The target driver compares its value
- * before checking other error flags.
- * (ex. check conditions)
- */
- pkt->pkt_resid = 0;
-
- /* get sense length from first 2 bytes */
- senselen = ((data[0] << 8) | data[1]) &
- (size_t)0xFFFF;
-
- /* Sanity-check on the sense length */
- if ((senselen + 2) > dlength) {
- senselen = dlength - 2;
- }
-
- /*
- * If there was a Data Digest error then
- * the sense data cannot be trusted.
- */
- if (icmdp->cmd_crc_error_seen) {
- senselen = 0;
- }
-
- /* automatic request sense */
- arqstat =
- (struct scsi_arq_status *)pkt->pkt_scbp;
-
- /* pass SCSI status up stack */
- *((uchar_t *)&arqstat->sts_status) =
- issrhp->cmd_status;
-
- /*
- * Set the status for the automatic
- * request sense command
- */
- arqstat->sts_rqpkt_state = (STATE_GOT_BUS |
- STATE_GOT_TARGET | STATE_SENT_CMD |
- STATE_XFERRED_DATA | STATE_GOT_STATUS |
- STATE_ARQ_DONE);
-
- *((uchar_t *)&arqstat->sts_rqpkt_status) =
- STATUS_GOOD;
-
- arqstat->sts_rqpkt_reason = CMD_CMPLT;
-
- statuslen = icmdp->cmd_un.scsi.statuslen;
-
- if (senselen == 0) {
- /* auto request sense failed */
- arqstat->sts_rqpkt_status.sts_chk = 1;
- arqstat->sts_rqpkt_resid =
- statuslen;
- } else if (senselen <
- statuslen) {
- /* auto request sense short */
- arqstat->sts_rqpkt_resid =
- statuslen
- - senselen;
- } else {
- /* auto request sense complete */
- arqstat->sts_rqpkt_resid = 0;
- }
- arqstat->sts_rqpkt_statistics = 0;
- pkt->pkt_state |= STATE_ARQ_DONE;
-
- if (icmdp->cmd_misc_flags &
- ISCSI_CMD_MISCFLAG_XARQ) {
- pkt->pkt_state |= STATE_XARQ_DONE;
- }
-
- /* copy auto request sense */
- dlength = min(senselen,
- statuslen);
- if (dlength) {
- bcopy(&data[2], (uchar_t *)&arqstat->
- sts_sensedata, dlength);
- }
- break;
- }
- /* FALLTHRU */
- case STATUS_BUSY:
- case STATUS_RESERVATION_CONFLICT:
- case STATUS_QFULL:
- case STATUS_ACA_ACTIVE:
- default:
- /*
- * If a bad command status is received we need to
- * reset the pkt_resid to zero. The target driver
- * compares its value before checking other error
- * flags. (ex. check conditions)
- */
- pkt->pkt_resid = 0;
- /* pass SCSI status up stack */
- if (pkt->pkt_scbp) {
- pkt->pkt_scbp[0] = issrhp->cmd_status;
- }
- }
- }
-
+ iscsi_enqueue_active_cmd(icp, icmdp);
iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E3, isp);
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_SUCCESS);
+ return (IDM_STATUS_SUCCESS);
}
/*
- * iscsi_rx_process_rtt_rsp - Process received RTT. This means the target is
- * requesting data.
- */
-/* ARGSUSED */
-static iscsi_status_t
-iscsi_rx_process_rtt_rsp(iscsi_conn_t *icp, iscsi_hdr_t *ihp, char *data)
-{
- iscsi_sess_t *isp = (iscsi_sess_t *)icp->conn_sess;
- iscsi_rtt_hdr_t *irhp = (iscsi_rtt_hdr_t *)ihp;
- iscsi_cmd_t *icmdp = NULL;
- struct buf *bp = NULL;
- uint32_t data_length;
- iscsi_status_t status = ISCSI_STATUS_PROTOCOL_ERROR;
-
-
- mutex_enter(&isp->sess_queue_pending.mutex);
- mutex_enter(&icp->conn_queue_active.mutex);
- mutex_enter(&isp->sess_cmdsn_mutex);
- if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(isp, ihp, &icmdp))) {
- mutex_exit(&isp->sess_cmdsn_mutex);
- mutex_exit(&icp->conn_queue_active.mutex);
- mutex_exit(&isp->sess_queue_pending.mutex);
- return (status);
- }
-
- /* update expcmdsn and maxcmdsn */
- iscsi_update_flow_control(isp, ntohl(irhp->maxcmdsn),
- ntohl(irhp->expcmdsn));
- mutex_enter(&icmdp->cmd_mutex);
- mutex_exit(&isp->sess_cmdsn_mutex);
-
- bp = icmdp->cmd_un.scsi.bp;
- data_length = ntohl(irhp->data_length);
-
- /*
- * Perform boundary-checks per RFC 3720 (section 10.8.4).
- * The Desired Data Transfer Length must satisfy this relation:
- *
- * 0 < Desired Data Transfer Length <= MaxBurstLength
- */
- if ((bp == NULL) || (data_length == 0)) {
- cmn_err(CE_WARN, "iscsi connection(%u) received r2t but pkt "
- "has no data itt:0x%x - protocol error", icp->conn_oid,
- irhp->itt);
- } else if (data_length > icp->conn_params.max_burst_length) {
- cmn_err(CE_WARN, "iscsi connection(%u) received r2t but pkt "
- "is larger than MaxBurstLength itt:0x%x len:0x%x - "
- "protocol error",
- icp->conn_oid, irhp->itt, data_length);
- } else {
- iscsi_handle_r2t(icp, icmdp, ntohl(irhp->data_offset),
- data_length, irhp->ttt);
- status = ISCSI_STATUS_SUCCESS;
- }
-
- mutex_exit(&icmdp->cmd_mutex);
- mutex_exit(&icp->conn_queue_active.mutex);
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- return (status);
-}
-
-
-/*
* iscsi_rx_process_nop - Process a received nop. If nop is in response
* to a ping we sent update stats. If initiated by the target we need
* to response back to the target with a nop. Schedule the response.
*/
/* ARGSUSED */
-static iscsi_status_t
-iscsi_rx_process_nop(iscsi_conn_t *icp, iscsi_hdr_t *ihp, char *data)
+static idm_status_t
+iscsi_rx_process_nop(idm_conn_t *ic, idm_pdu_t *pdu)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
iscsi_sess_t *isp = NULL;
- iscsi_nop_in_hdr_t *inihp = (iscsi_nop_in_hdr_t *)ihp;
+ iscsi_nop_in_hdr_t *inihp = (iscsi_nop_in_hdr_t *)pdu->isp_hdr;
iscsi_cmd_t *icmdp = NULL;
-
- ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
- /* ASSERT(data != NULL) data is allowed to be NULL */
+ iscsi_conn_t *icp = ic->ic_handle;
+
+ if (icp->conn_expstatsn != ntohl(inihp->statsn)) {
+ cmn_err(CE_WARN, "iscsi connection(%u/%x) protocol error - "
+ "received status out of order itt:0x%x statsn:0x%x "
+ "expstatsn:0x%x", icp->conn_oid, inihp->opcode, inihp->itt,
+ ntohl(inihp->statsn), icp->conn_expstatsn);
+ return (IDM_STATUS_PROTOCOL_ERROR);
+ }
isp = icp->conn_sess;
ASSERT(isp != NULL);
-
- if (icp->conn_expstatsn != ntohl(inihp->statsn)) {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, inihp->itt,
- ntohl(inihp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
- }
-
mutex_enter(&isp->sess_queue_pending.mutex);
mutex_enter(&icp->conn_queue_active.mutex);
mutex_enter(&isp->sess_cmdsn_mutex);
if (inihp->itt != ISCSI_RSVD_TASK_TAG) {
if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(
- isp, ihp, &icmdp))) {
+ isp, (iscsi_hdr_t *)inihp, &icmdp))) {
+ cmn_err(CE_WARN, "iscsi connection(%u) protocol error "
+ "- can not find cmd for itt:0x%x",
+ icp->conn_oid, inihp->itt);
mutex_exit(&isp->sess_cmdsn_mutex);
mutex_exit(&icp->conn_queue_active.mutex);
mutex_exit(&isp->sess_queue_pending.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
}
@@ -1022,48 +1002,41 @@
mutex_exit(&icp->conn_queue_active.mutex);
mutex_exit(&isp->sess_queue_pending.mutex);
- return (rval);
+ return (IDM_STATUS_SUCCESS);
}
/*
* iscsi_rx_process_reject_rsp - The server rejected a PDU
*/
-static iscsi_status_t
-iscsi_rx_process_reject_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, char *data)
+static idm_status_t
+iscsi_rx_process_reject_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
- iscsi_reject_rsp_hdr_t *irrhp = (iscsi_reject_rsp_hdr_t *)ihp;
- iscsi_sess_t *isp = NULL;
- uint32_t dlength = 0;
- iscsi_hdr_t *old_ihp = NULL;
-
- ASSERT(icp != NULL);
- isp = icp->conn_sess;
- ASSERT(ihp != NULL);
+ iscsi_reject_rsp_hdr_t *irrhp = (iscsi_reject_rsp_hdr_t *)pdu->isp_hdr;
+ iscsi_sess_t *isp = NULL;
+ uint32_t dlength = 0;
+ iscsi_hdr_t *old_ihp = NULL;
+ iscsi_conn_t *icp = ic->ic_handle;
+ uint8_t *data = pdu->isp_data;
+ iscsi_hdr_t *ihp = (iscsi_hdr_t *)irrhp;
+ idm_status_t status;
+ iscsi_cmd_t *icmdp = NULL;
+
ASSERT(data != NULL);
-
- /* make sure we only Ack Status numbers that we've actually received. */
- if (icp->conn_expstatsn == ntohl(irrhp->statsn)) {
- icp->conn_expstatsn++;
- } else {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, ihp->itt,
- ntohl(irrhp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ isp = icp->conn_sess;
+ ASSERT(isp != NULL);
+
+ mutex_enter(&icp->conn_queue_active.mutex);
+ if ((status = iscsi_rx_chk(icp, isp, (iscsi_scsi_rsp_hdr_t *)irrhp,
+ &icmdp)) != IDM_STATUS_SUCCESS) {
+ mutex_exit(&icp->conn_queue_active.mutex);
+ return (status);
}
- /* update expcmdsn and maxcmdsn */
- mutex_enter(&isp->sess_cmdsn_mutex);
- iscsi_update_flow_control(isp, ntohl(irrhp->maxcmdsn),
- ntohl(irrhp->expcmdsn));
- mutex_exit(&isp->sess_cmdsn_mutex);
-
/* If we don't have the rejected header we can't do anything */
dlength = n2h24(irrhp->dlength);
if (dlength < sizeof (iscsi_hdr_t)) {
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
/* map old ihp */
@@ -1083,7 +1056,7 @@
*/
if (!(old_ihp->opcode & ISCSI_OP_IMMEDIATE)) {
/* Rejecting IMM but old old_hdr wasn't IMM */
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
/*
@@ -1099,15 +1072,15 @@
*/
break;
case ISCSI_OP_SCSI_TASK_MGT_MSG:
- (void) iscsi_rx_process_rejected_tsk_mgt(icp,
- old_ihp);
+ (void) iscsi_rx_process_rejected_tsk_mgt(ic, old_ihp);
break;
default:
cmn_err(CE_WARN, "iscsi connection(%u) protocol error "
"- received a reject for a command(0x%02x) not "
"sent as an immediate", icp->conn_oid,
old_ihp->opcode);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ status = IDM_STATUS_PROTOCOL_ERROR;
+ break;
}
break;
@@ -1130,27 +1103,28 @@
cmn_err(CE_WARN, "iscsi connection(%u) closing connection - "
"target requested itt:0x%x reason:0x%x",
icp->conn_oid, ihp->itt, irrhp->reason);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ status = IDM_STATUS_PROTOCOL_ERROR;
+ break;
}
- return (ISCSI_STATUS_SUCCESS);
+ return (IDM_STATUS_SUCCESS);
}
/*
* iscsi_rx_process_rejected_tsk_mgt -
*/
-static iscsi_status_t
-iscsi_rx_process_rejected_tsk_mgt(iscsi_conn_t *icp,
- iscsi_hdr_t *old_ihp)
+/* ARGSUSED */
+static idm_status_t
+iscsi_rx_process_rejected_tsk_mgt(idm_conn_t *ic, iscsi_hdr_t *old_ihp)
{
- iscsi_sess_t *isp = NULL;
- iscsi_cmd_t *icmdp = NULL;
-
- ASSERT(icp != NULL);
+ iscsi_sess_t *isp = NULL;
+ iscsi_cmd_t *icmdp = NULL;
+ iscsi_conn_t *icp = NULL;
+
isp = icp->conn_sess;
ASSERT(old_ihp != NULL);
- ASSERT(icp->conn_sess != NULL);
+ ASSERT(isp != NULL);
mutex_enter(&icp->conn_queue_active.mutex);
mutex_enter(&isp->sess_cmdsn_mutex);
@@ -1158,7 +1132,7 @@
isp, old_ihp, &icmdp))) {
mutex_exit(&isp->sess_cmdsn_mutex);
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
mutex_exit(&isp->sess_cmdsn_mutex);
@@ -1175,7 +1149,7 @@
}
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_SUCCESS);
+ return (IDM_STATUS_SUCCESS);
}
@@ -1183,44 +1157,25 @@
* iscsi_rx_process_task_mgt_rsp -
*/
/* ARGSUSED */
-static iscsi_status_t
-iscsi_rx_process_task_mgt_rsp(iscsi_conn_t *icp,
- iscsi_hdr_t *ihp, void *data)
+static idm_status_t
+iscsi_rx_process_task_mgt_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
iscsi_sess_t *isp = NULL;
iscsi_scsi_task_mgt_rsp_hdr_t *istmrhp = NULL;
iscsi_cmd_t *icmdp = NULL;
-
- ASSERT(ihp != NULL);
- ASSERT(icp != NULL);
+ iscsi_conn_t *icp = ic->ic_handle;
+ idm_status_t status = IDM_STATUS_SUCCESS;
+
isp = icp->conn_sess;
- ASSERT(isp != NULL);
- istmrhp = (iscsi_scsi_task_mgt_rsp_hdr_t *)ihp;
-
- if (icp->conn_expstatsn == ntohl(istmrhp->statsn)) {
- icp->conn_expstatsn++;
- } else {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, istmrhp->itt,
- ntohl(istmrhp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ istmrhp = (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr;
+
+ mutex_enter(&icp->conn_queue_active.mutex);
+ if ((status = iscsi_rx_chk(icp, isp, (iscsi_scsi_rsp_hdr_t *)istmrhp,
+ &icmdp)) != IDM_STATUS_SUCCESS) {
+ mutex_exit(&icp->conn_queue_active.mutex);
+ return (status);
}
- /* make sure we only Ack Status numbers that we've actually received. */
- mutex_enter(&icp->conn_queue_active.mutex);
- mutex_enter(&isp->sess_cmdsn_mutex);
- if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(isp, ihp, &icmdp))) {
- mutex_exit(&isp->sess_cmdsn_mutex);
- mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
- }
-
- /* update expcmdsn and maxcmdn */
- iscsi_update_flow_control(isp, ntohl(istmrhp->maxcmdsn),
- ntohl(istmrhp->expcmdsn));
- mutex_exit(&isp->sess_cmdsn_mutex);
-
switch (icmdp->cmd_type) {
case ISCSI_CMD_TYPE_ABORT:
case ISCSI_CMD_TYPE_RESET:
@@ -1258,8 +1213,7 @@
* the connection to try and recover
* to a known state.
*/
- mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ status = IDM_STATUS_PROTOCOL_ERROR;
}
break;
@@ -1268,49 +1222,48 @@
"received a task mgt response for a non-task mgt "
"cmd itt:0x%x type:%d", icp->conn_oid, istmrhp->itt,
icmdp->cmd_type);
- mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ status = IDM_STATUS_PROTOCOL_ERROR;
+ break;
}
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_SUCCESS);
+ return (status);
}
/*
- * iscsi_rx_process_logout -
+ * iscsi_rx_process_logout_rsp -
*
*/
/* ARGSUSED */
-static iscsi_status_t
-iscsi_rx_process_logout_rsp(iscsi_conn_t *icp, iscsi_hdr_t *ihp, char *data)
+idm_status_t
+iscsi_rx_process_logout_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_sess_t *isp = icp->conn_sess;
- iscsi_logout_rsp_hdr_t *ilrhp = (iscsi_logout_rsp_hdr_t *)ihp;
+ iscsi_conn_t *icp = ic->ic_handle;
+ iscsi_logout_rsp_hdr_t *ilrhp =
+ (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr;
iscsi_cmd_t *icmdp = NULL;
-
- ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
+ iscsi_sess_t *isp;
+ idm_status_t status = IDM_STATUS_SUCCESS;
+
isp = icp->conn_sess;
- ASSERT(isp != NULL);
if (icp->conn_expstatsn != ntohl(ilrhp->statsn)) {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
+ cmn_err(CE_WARN, "iscsi connection(%u/%x) protocol error - "
"received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, ilrhp->itt,
+ "expstatsn:0x%x", icp->conn_oid, ilrhp->opcode, ilrhp->itt,
ntohl(ilrhp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
mutex_enter(&icp->conn_queue_active.mutex);
mutex_enter(&isp->sess_cmdsn_mutex);
if (ilrhp->itt != ISCSI_RSVD_TASK_TAG) {
if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(
- isp, ihp, &icmdp))) {
+ isp, (iscsi_hdr_t *)ilrhp, &icmdp))) {
mutex_exit(&isp->sess_cmdsn_mutex);
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
}
@@ -1319,6 +1272,9 @@
ntohl(ilrhp->expcmdsn));
mutex_exit(&isp->sess_cmdsn_mutex);
+ ISCSI_IO_LOG(CE_NOTE,
+ "DEBUG: iscsi_rx_process_logout_rsp: response: %d",
+ ilrhp->response);
switch (ilrhp->response) {
case ISCSI_LOGOUT_CID_NOT_FOUND:
/*
@@ -1343,42 +1299,48 @@
case ISCSI_LOGOUT_SUCCESS:
iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E3, isp);
mutex_exit(&icp->conn_queue_active.mutex);
- /* logout completed successfully notify the conn */
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T17);
- mutex_exit(&icp->conn_state_mutex);
+ iscsi_drop_conn_cleanup(icp);
break;
default:
mutex_exit(&icp->conn_queue_active.mutex);
- rval = ISCSI_STATUS_PROTOCOL_ERROR;
+ status = IDM_STATUS_PROTOCOL_ERROR;
+ break;
+
}
-
- return (rval);
+ return (status);
}
-
/*
- * iscsi_rx_process_logout -
+ * iscsi_rx_process_async_rsp
*
*/
/* ARGSUSED */
-static iscsi_status_t
-iscsi_rx_process_async_rsp(iscsi_conn_t *icp, iscsi_hdr_t *ihp, char *data)
+static idm_status_t
+iscsi_rx_process_async_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_async_evt_hdr_t *iaehp = (iscsi_async_evt_hdr_t *)ihp;
+ iscsi_conn_t *icp = ic->ic_handle;
+ iscsi_sess_t *isp = icp->conn_sess;
+ idm_status_t rval = IDM_STATUS_SUCCESS;
+ iscsi_task_t *itp;
+ iscsi_async_evt_hdr_t *iaehp =
+ (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
- ASSERT(icp->conn_sess != NULL);
-
- if (icp->conn_expstatsn != ntohl(iaehp->statsn)) {
+ ASSERT(pdu != NULL);
+ ASSERT(isp != NULL);
+
+ mutex_enter(&isp->sess_cmdsn_mutex);
+ if (icp->conn_expstatsn == ntohl(iaehp->statsn)) {
+ icp->conn_expstatsn++;
+ } else {
cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, ihp->itt,
+ "received status out of order statsn:0x%x "
+ "expstatsn:0x%x", icp->conn_oid,
ntohl(iaehp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ mutex_exit(&isp->sess_cmdsn_mutex);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
+ mutex_exit(&isp->sess_cmdsn_mutex);
switch (iaehp->async_event) {
case ISCSI_ASYNC_EVENT_SCSI_EVENT:
@@ -1402,16 +1364,35 @@
* action to these events of dis/reconnecting.
* Once reconnected we perform a reenumeration.
*/
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
+ idm_ini_conn_disconnect(ic);
break;
case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
- /* Target has requested this connection to logout. */
+ /*
+ * We've been asked to logout by the target --
+ * we need to treat this differently from a normal logout
+ * due to a discovery failure. Normal logouts result in
+ * an N3 event to the session state machine and an offline
+ * of the lun. In this case we want to put the connection
+ * into "failed" state and generate N5 to the session state
+ * machine since the initiator logged out at the target's
+ * request. To track this we set a flag indicating we
+ * received this async logout request from the tharget
+ */
mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T14);
+ icp->conn_async_logout = B_TRUE;
mutex_exit(&icp->conn_state_mutex);
+
+ /* Target has requested this connection to logout. */
+ itp = kmem_zalloc(sizeof (iscsi_task_t), KM_SLEEP);
+ itp->t_arg = icp;
+ itp->t_blocking = B_FALSE;
+ if (ddi_taskq_dispatch(isp->sess_taskq,
+ (void(*)())iscsi_logout_start, itp, DDI_SLEEP) !=
+ DDI_SUCCESS) {
+ /* Disconnect if we couldn't dispatch the task */
+ idm_ini_conn_disconnect(ic);
+ }
break;
case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
@@ -1427,10 +1408,9 @@
* we need to check the CID and drop that
* specific connection.
*/
- iscsi_conn_set_login_min_max(icp, iaehp->param2, iaehp->param3);
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
+ iscsi_conn_set_login_min_max(icp, iaehp->param2,
+ iaehp->param3);
+ idm_ini_conn_disconnect(ic);
break;
case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
@@ -1445,10 +1425,9 @@
* then we need to drop all connections on the
* session.
*/
- iscsi_conn_set_login_min_max(icp, iaehp->param2, iaehp->param3);
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
+ iscsi_conn_set_login_min_max(icp, iaehp->param2,
+ iaehp->param3);
+ idm_ini_conn_disconnect(ic);
break;
case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
@@ -1460,9 +1439,15 @@
* now we will request a logout. We can't
* just ignore this or it might force corruption?
*/
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
+ itp = kmem_zalloc(sizeof (iscsi_task_t), KM_SLEEP);
+ itp->t_arg = icp;
+ itp->t_blocking = B_FALSE;
+ if (ddi_taskq_dispatch(isp->sess_taskq,
+ (void(*)())iscsi_logout_start, itp, DDI_SLEEP) !=
+ DDI_SUCCESS) {
+ /* Disconnect if we couldn't dispatch the task */
+ idm_ini_conn_disconnect(ic);
+ }
break;
case ISCSI_ASYNC_EVENT_VENDOR_SPECIFIC:
@@ -1471,12 +1456,10 @@
* specific async events. So just ignore
* the request.
*/
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T14);
- mutex_exit(&icp->conn_state_mutex);
+ idm_ini_conn_disconnect(ic);
break;
default:
- rval = ISCSI_STATUS_PROTOCOL_ERROR;
+ rval = IDM_STATUS_PROTOCOL_ERROR;
}
return (rval);
@@ -1488,51 +1471,34 @@
* status value instead of returning the status value. The return value
* is SUCCESS in order to let iscsi_handle_text control the operation of
* a text request.
- * Test requests are a handled a little different than other types of
+ * Text requests are a handled a little different than other types of
* iSCSI commands because the initiator sends additional empty text requests
* in order to obtain the remaining responses required to complete the
* request. iscsi_handle_text controls the operation of text request, while
* iscsi_rx_process_text_rsp just process the current response.
*/
-static iscsi_status_t
-iscsi_rx_process_text_rsp(iscsi_conn_t *icp, iscsi_hdr_t *ihp, char *data)
+static idm_status_t
+iscsi_rx_process_text_rsp(idm_conn_t *ic, idm_pdu_t *pdu)
{
iscsi_sess_t *isp = NULL;
- iscsi_text_rsp_hdr_t *ithp = (iscsi_text_rsp_hdr_t *)ihp;
+ iscsi_text_rsp_hdr_t *ithp =
+ (iscsi_text_rsp_hdr_t *)pdu->isp_hdr;
+ iscsi_conn_t *icp = ic->ic_handle;
iscsi_cmd_t *icmdp = NULL;
boolean_t final = B_FALSE;
uint32_t data_len;
-
- ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
- ASSERT(data != NULL);
+ uint8_t *data = pdu->isp_data;
+ idm_status_t rval;
isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- if (icp->conn_expstatsn == ntohl(ithp->statsn)) {
- icp->conn_expstatsn++;
- } else {
- cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
- "received status out of order itt:0x%x statsn:0x%x "
- "expstatsn:0x%x", icp->conn_oid, ithp->itt,
- ntohl(ithp->statsn), icp->conn_expstatsn);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
- }
mutex_enter(&icp->conn_queue_active.mutex);
- mutex_enter(&isp->sess_cmdsn_mutex);
- if (!ISCSI_SUCCESS(iscsi_rx_process_itt_to_icmdp(isp, ihp, &icmdp))) {
- mutex_exit(&isp->sess_cmdsn_mutex);
+ if ((rval = iscsi_rx_chk(icp, isp, (iscsi_scsi_rsp_hdr_t *)ithp,
+ &icmdp)) != IDM_STATUS_SUCCESS) {
mutex_exit(&icp->conn_queue_active.mutex);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (rval);
}
- /* update expcmdsn and maxcmdsn */
- iscsi_update_flow_control(isp, ntohl(ithp->maxcmdsn),
- ntohl(ithp->expcmdsn));
- mutex_exit(&isp->sess_cmdsn_mutex);
-
/* update local final response flag */
if (ithp->flags & ISCSI_FLAG_FINAL) {
final = B_TRUE;
@@ -1553,7 +1519,7 @@
cmn_err(CE_WARN, "iscsi connection(%u) protocol error - "
"received text response with invalid flags:0x%x or "
"ttt:0x%x", icp->conn_oid, ithp->flags, ithp->itt);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
if ((icmdp->cmd_un.text.stage == ISCSI_CMD_TEXT_INITIAL_REQ) &&
@@ -1566,7 +1532,7 @@
cmn_err(CE_WARN, "iscsi connection(%u) protocol "
"error - received text response with invalid "
"ttt:0x%x", icp->conn_oid, ithp->ttt);
- return (ISCSI_STATUS_PROTOCOL_ERROR);
+ return (IDM_STATUS_PROTOCOL_ERROR);
}
/*
@@ -1610,7 +1576,36 @@
iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E3, isp);
mutex_exit(&icp->conn_queue_active.mutex);
+ return (IDM_STATUS_SUCCESS);
+}
+
+/*
+ * iscsi_rx_process_scsi_itt_to_icmdp - Lookup itt using IDM to find matching
+ * icmdp. Verify itt in hdr and icmdp are the same.
+ */
+static iscsi_status_t
+iscsi_rx_process_scsi_itt_to_icmdp(iscsi_sess_t *isp, idm_conn_t *ic,
+ iscsi_scsi_rsp_hdr_t *ihp, iscsi_cmd_t **icmdp)
+{
+ idm_task_t *itp;
+
+ ASSERT(isp != NULL);
+ ASSERT(ihp != NULL);
+ ASSERT(icmdp != NULL);
+ ASSERT(mutex_owned(&isp->sess_cmdsn_mutex));
+ itp = idm_task_find_and_complete(ic, ihp->itt, ISCSI_INI_TASK_TTT);
+ if (itp == NULL) {
+ cmn_err(CE_WARN, "iscsi session(%u) protocol error - "
+ "received unknown itt:0x%x - protocol error",
+ isp->sess_oid, ihp->itt);
+ return (ISCSI_STATUS_INTERNAL_ERROR);
+ }
+ *icmdp = itp->idt_private;
+
+ idm_task_rele(itp);
+
return (ISCSI_STATUS_SUCCESS);
+
}
/*
@@ -1630,7 +1625,7 @@
ASSERT(mutex_owned(&isp->sess_cmdsn_mutex));
/* try to find an associated iscsi_pkt */
- cmd_table_idx = ihp->itt % ISCSI_CMD_TABLE_SIZE;
+ cmd_table_idx = (ihp->itt - IDM_TASKIDS_MAX) % ISCSI_CMD_TABLE_SIZE;
if (isp->sess_cmd_table[cmd_table_idx] == NULL) {
cmn_err(CE_WARN, "iscsi session(%u) protocol error - "
"received unknown itt:0x%x - protocol error",
@@ -1662,7 +1657,6 @@
return (ISCSI_STATUS_SUCCESS);
}
-
/*
* +--------------------------------------------------------------------+
* | End of protocol receive routines |
@@ -1678,7 +1672,7 @@
/*
* iscsi_tx_thread - This thread is the driving point for all
- * iSCSI PDUs after login. No PDUs should call sendpdu()
+ * iSCSI PDUs after login. No PDUs should call idm_pdu_tx()
* directly they should be funneled through iscsi_tx_thread.
*/
void
@@ -1703,7 +1697,6 @@
while (ret != 0) {
isp->sess_window_open = B_TRUE;
-
/*
* While the window is open, there are commands available
* to send and the session state allows those commands to
@@ -1762,9 +1755,6 @@
case ISCSI_CMD_TYPE_SCSI:
rval = iscsi_tx_scsi(isp, icmdp);
break;
- case ISCSI_CMD_TYPE_R2T:
- rval = iscsi_tx_r2t(isp, icmdp);
- break;
case ISCSI_CMD_TYPE_NOP:
rval = iscsi_tx_nop(isp, icmdp);
break;
@@ -1781,6 +1771,8 @@
rval = iscsi_tx_text(isp, icmdp);
break;
default:
+ cmn_err(CE_WARN, "iscsi_tx_cmd: invalid cmdtype: %d",
+ icmdp->cmd_type);
ASSERT(FALSE);
}
@@ -1803,71 +1795,36 @@
#define ADDLHDRSZ(x) (sizeof (iscsi_addl_hdr_t) + (x) - \
16 - 4)
-/*
- * iscsi_tx_scsi -
- *
- */
-static iscsi_status_t
-iscsi_tx_scsi(iscsi_sess_t *isp, iscsi_cmd_t *icmdp)
+static void
+iscsi_tx_init_hdr(iscsi_sess_t *isp, iscsi_conn_t *icp,
+ iscsi_text_hdr_t *ihp, int opcode, uint32_t cmd_itt)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_conn_t *icp = NULL;
- struct scsi_pkt *pkt = NULL;
- struct buf *bp = NULL;
- union {
- iscsi_scsi_cmd_hdr_t isch;
- iscsi_addl_hdr_t iah;
- uchar_t arr[ADDLHDRSZ(DEF_CDB_LEN)];
- } hdr_un;
- iscsi_scsi_cmd_hdr_t *ihp =
- (iscsi_scsi_cmd_hdr_t *)&hdr_un.isch;
- int cdblen = 0;
- size_t buflen = 0;
- uint32_t imdata = 0;
- uint32_t first_burst_length = 0;
-
- ASSERT(isp != NULL);
- ASSERT(icmdp != NULL);
- pkt = icmdp->cmd_un.scsi.pkt;
- ASSERT(pkt != NULL);
- bp = icmdp->cmd_un.scsi.bp;
- icp = icmdp->cmd_conn;
- ASSERT(icp != NULL);
-
- /* Reset counts in case we are on a retry */
- icmdp->cmd_un.scsi.data_transferred = 0;
-
- if (icmdp->cmd_un.scsi.cmdlen > DEF_CDB_LEN) {
- cdblen = icmdp->cmd_un.scsi.cmdlen;
- ihp = kmem_zalloc(ADDLHDRSZ(cdblen), KM_SLEEP);
- } else {
- /*
- * only bzero the basic header; the additional header
- * will be set up correctly later, if needed
- */
- bzero(ihp, sizeof (iscsi_scsi_cmd_hdr_t));
- }
- ihp->opcode = ISCSI_OP_SCSI_CMD;
- ihp->itt = icmdp->cmd_itt;
+ ihp->opcode = opcode;
+ ihp->itt = cmd_itt;
mutex_enter(&isp->sess_cmdsn_mutex);
ihp->cmdsn = htonl(isp->sess_cmdsn);
isp->sess_cmdsn++;
mutex_exit(&isp->sess_cmdsn_mutex);
ihp->expstatsn = htonl(icp->conn_expstatsn);
icp->conn_laststatsn = icp->conn_expstatsn;
-
- pkt->pkt_state = (STATE_GOT_BUS | STATE_GOT_TARGET);
- pkt->pkt_reason = CMD_INCOMPLETE;
-
- /*
- * Sestion 12.11 of the iSCSI specification has a good table
- * describing when uncolicited data and/or immediate data
- * should be sent.
- */
+}
+
+
+static void
+iscsi_tx_scsi_data(iscsi_cmd_t *icmdp, iscsi_scsi_cmd_hdr_t *ihp,
+ iscsi_conn_t *icp, idm_pdu_t *pdu)
+{
+ struct buf *bp = NULL;
+ size_t buflen = 0;
+ uint32_t first_burst_length = 0;
+ struct scsi_pkt *pkt;
+
+ pkt = icmdp->cmd_un.scsi.pkt;
bp = icmdp->cmd_un.scsi.bp;
if ((bp != NULL) && bp->b_bcount) {
buflen = bp->b_bcount;
- first_burst_length = icp->conn_params.first_burst_length;
+ first_burst_length =
+ icp->conn_params.first_burst_length;
if (bp->b_flags & B_READ) {
ihp->flags = ISCSI_FLAG_FINAL;
@@ -1897,7 +1854,11 @@
/* Check if we should send ImmediateData */
if (icp->conn_params.immediate_data) {
- imdata = MIN(MIN(buflen,
+ pdu->isp_data =
+ (uint8_t *)icmdp->
+ cmd_un.scsi.bp->b_un.b_addr;
+
+ pdu->isp_datalen = MIN(MIN(buflen,
first_burst_length),
icmdp->cmd_conn->conn_params.
max_xmit_data_seg_len);
@@ -1907,21 +1868,63 @@
* we can send all burst data immediate
* (not unsol), set F
*/
- if ((imdata == buflen) ||
- (imdata == first_burst_length)) {
+ /*
+ * XXX This doesn't look right -- it's not
+ * clear how we can handle transmitting
+ * any unsolicited data. It looks like
+ * we only support immediate data. So what
+ * happens if we don't set ISCSI_FLAG_FINAL?
+ *
+ * Unless there's magic code somewhere that
+ * is sending the remaining PDU's we should
+ * simply set ISCSI_FLAG_FINAL and forget
+ * about sending unsolicited data. The big
+ * win is the immediate data anyway for small
+ * PDU's.
+ */
+ if ((pdu->isp_datalen == buflen) ||
+ (pdu->isp_datalen == first_burst_length)) {
ihp->flags |= ISCSI_FLAG_FINAL;
}
- hton24(ihp->dlength, imdata);
+ hton24(ihp->dlength, pdu->isp_datalen);
}
-
/* total data transfer length */
ihp->data_length = htonl(buflen);
}
} else {
ihp->flags = ISCSI_FLAG_FINAL;
- buflen = 0;
}
+ icmdp->cmd_un.scsi.data_transferred += pdu->isp_datalen;
+ /* XXX How is this different from the code above? */
+ /* will idm send the next data command up to burst length? */
+ /* send the burstlen if we haven't sent immediate data */
+ /* CRM: should idm send difference min(buflen, first_burst) and imm? */
+ /* (MIN(first_burst_length, buflen) - imdata > 0) */
+ /* CRM_LATER: change this to generate unsolicited pdu */
+ if ((buflen > 0) &&
+ ((bp->b_flags & B_READ) == 0) &&
+ (icp->conn_params.initial_r2t == 0) &&
+ pdu->isp_datalen == 0) {
+
+ pdu->isp_datalen = MIN(first_burst_length, buflen);
+ if ((pdu->isp_datalen == buflen) ||
+ (pdu->isp_datalen == first_burst_length)) {
+ ihp->flags |= ISCSI_FLAG_FINAL;
+ }
+ pdu->isp_data = (uint8_t *)icmdp->cmd_un.scsi.bp->b_un.b_addr;
+ hton24(ihp->dlength, pdu->isp_datalen);
+ }
+}
+
+static void
+iscsi_tx_scsi_init_pkt(iscsi_cmd_t *icmdp, iscsi_scsi_cmd_hdr_t *ihp)
+{
+ struct scsi_pkt *pkt;
+
+ pkt = icmdp->cmd_un.scsi.pkt;
+ pkt->pkt_state = (STATE_GOT_BUS | STATE_GOT_TARGET);
+ pkt->pkt_reason = CMD_INCOMPLETE;
/* tagged queuing */
if (pkt->pkt_flags & FLAG_HTAG) {
@@ -1962,204 +1965,169 @@
* Update all values before transfering.
* We should never touch the icmdp after
* transfering if there is no more data
- * to send. The only case the sendpdu()
+ * to send. The only case the idm_pdu_tx()
* will fail is a on a connection disconnect
* in that case the command will be flushed.
*/
pkt->pkt_state |= STATE_SENT_CMD;
-
- icmdp->cmd_un.scsi.data_transferred += imdata;
+}
+
+static void
+iscsi_tx_scsi_init_task(iscsi_cmd_t *icmdp, iscsi_conn_t *icp,
+ iscsi_scsi_cmd_hdr_t *ihp)
+{
+ idm_task_t *itp;
+ struct buf *bp = NULL;
+ uint32_t data_length;
+
+ bp = icmdp->cmd_un.scsi.bp;
+
+ itp = icmdp->cmd_itp;
+ ASSERT(itp != NULL);
+ data_length = ntohl(ihp->data_length);
+ ISCSI_IO_LOG(CE_NOTE,
+ "DEBUG: iscsi_tx_init_task: task_start: %p idt_tt: %x cmdsn: %x "
+ "sess_cmdsn: %x cmd: %p "
+ "cmdtype: %d datalen: %u",
+ (void *)itp, itp->idt_tt, ihp->cmdsn, icp->conn_sess->sess_cmdsn,
+ (void *)icmdp, icmdp->cmd_type, data_length);
+ if (data_length > 0) {
+ if (bp->b_flags & B_READ) {
+ icmdp->cmd_un.scsi.ibp_ibuf =
+ idm_buf_alloc(icp->conn_ic,
+ bp->b_un.b_addr, bp->b_bcount);
+ if (icmdp->cmd_un.scsi.ibp_ibuf)
+ idm_buf_bind_in(itp,
+ icmdp->cmd_un.scsi.ibp_ibuf);
+ } else {
+ icmdp->cmd_un.scsi.ibp_obuf =
+ idm_buf_alloc(icp->conn_ic,
+ bp->b_un.b_addr, bp->b_bcount);
+ if (icmdp->cmd_un.scsi.ibp_obuf)
+ idm_buf_bind_out(itp,
+ icmdp->cmd_un.scsi.ibp_obuf);
+ }
+ ISCSI_IO_LOG(CE_NOTE,
+ "DEBUG: pdu_tx: task_start(%s): %p ic: %p idt_tt: %x "
+ "cmdsn: %x sess_cmdsn: %x sess_expcmdsn: %x obuf: %p "
+ "cmdp: %p cmdtype: %d "
+ "buflen: %lu " "bpaddr: %p datalen: %u ",
+ bp->b_flags & B_READ ? "B_READ" : "B_WRITE",
+ (void *)itp, (void *)icp->conn_ic,
+ itp->idt_tt, ihp->cmdsn,
+ icp->conn_sess->sess_cmdsn,
+ icp->conn_sess->sess_expcmdsn,
+ (void *)icmdp->cmd_un.scsi.ibp_ibuf,
+ (void *)icmdp, icmdp->cmd_type, bp->b_bcount,
+ (void *)bp->b_un.b_addr,
+ data_length);
+ }
/*
- * Check if there is additional data to transfer beyond what
- * will be sent as part of the initial command. If InitialR2T
- * is disabled then we should fake up a R2T so all the data,
- * up to first burst length, is sent in an unsolicited
- * fashion. We have already sent as much immediate data
- * as possible.
+ * Task is now active
*/
- if ((buflen > 0) &&
- ((bp->b_flags & B_READ) == 0) &&
- (icp->conn_params.initial_r2t == 0) &&
- (MIN(first_burst_length, buflen) - imdata > 0)) {
-
- uint32_t xfer_len = MIN(first_burst_length, buflen) - imdata;
- /* data will be chunked at tx */
- iscsi_handle_r2t(icp, icmdp, imdata,
- xfer_len, ISCSI_RSVD_TASK_TAG);
- }
-
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- /* Transfer Cmd PDU */
- if (imdata) {
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)ihp, icmdp->cmd_un.scsi.bp->b_un.b_addr,
- ISCSI_CONN_TO_NET_DIGEST(icp));
- if (ISCSI_SUCCESS(rval)) {
- KSTAT_ADD_CONN_TX_BYTES(icp, imdata);
- }
- } else {
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)ihp, NULL,
- ISCSI_CONN_TO_NET_DIGEST(icp));
- }
- if (cdblen) {
- kmem_free(ihp, ADDLHDRSZ(cdblen));
- }
-
- return (rval);
+ idm_task_start(itp, ISCSI_INI_TASK_TTT);
}
-
/*
- * iscsi_tx_r2t -
+ * iscsi_tx_scsi -
*
*/
static iscsi_status_t
-iscsi_tx_r2t(iscsi_sess_t *isp, iscsi_cmd_t *icmdp)
+iscsi_tx_scsi(iscsi_sess_t *isp, iscsi_cmd_t *icmdp)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- iscsi_conn_t *icp = NULL;
- iscsi_cmd_t *orig_icmdp = NULL;
+ iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
+ iscsi_conn_t *icp = NULL;
+ struct scsi_pkt *pkt = NULL;
+ iscsi_scsi_cmd_hdr_t *ihp = NULL;
+ int cdblen = 0;
+ idm_pdu_t *pdu;
+ int len;
ASSERT(isp != NULL);
ASSERT(icmdp != NULL);
+
+ pdu = kmem_zalloc(sizeof (idm_pdu_t), KM_SLEEP);
+
+ pkt = icmdp->cmd_un.scsi.pkt;
+ ASSERT(pkt != NULL);
icp = icmdp->cmd_conn;
- ASSERT(icp);
- orig_icmdp = icmdp->cmd_un.r2t.icmdp;
- ASSERT(orig_icmdp);
-
- /* validate the offset and length against the buffer size */
- if ((icmdp->cmd_un.r2t.offset + icmdp->cmd_un.r2t.length) >
- orig_icmdp->cmd_un.scsi.bp->b_bcount) {
- cmn_err(CE_WARN, "iscsi session(%u) ignoring invalid r2t "
- "for icmd itt:0x%x offset:0x%x length:0x%x bufsize:0x%lx",
- isp->sess_oid, icmdp->cmd_itt, icmdp->cmd_un.r2t.offset,
- icmdp->cmd_un.r2t.length, orig_icmdp->cmd_un.scsi.bp->
- b_bcount);
- mutex_exit(&isp->sess_queue_pending.mutex);
- return (ISCSI_STATUS_INTERNAL_ERROR);
+ ASSERT(icp != NULL);
+
+ /* Reset counts in case we are on a retry */
+ icmdp->cmd_un.scsi.data_transferred = 0;
+
+ if (icmdp->cmd_un.scsi.cmdlen > DEF_CDB_LEN) {
+ cdblen = icmdp->cmd_un.scsi.cmdlen;
+ ihp = kmem_zalloc(ADDLHDRSZ(cdblen), KM_SLEEP);
+ len = ADDLHDRSZ(cdblen);
+ } else {
+ /*
+ * only bzero the basic header; the additional header
+ * will be set up correctly later, if needed
+ */
+ ihp = kmem_zalloc(sizeof (iscsi_scsi_cmd_hdr_t), KM_SLEEP);
+ len = sizeof (iscsi_scsi_cmd_hdr_t);
}
- ASSERT(orig_icmdp->cmd_un.scsi.r2t_icmdp);
-
- rval = iscsi_tx_data(isp, icp, orig_icmdp, icmdp->cmd_ttt,
- icmdp->cmd_un.r2t.length, icmdp->cmd_un.r2t.offset);
-
- mutex_enter(&orig_icmdp->cmd_mutex);
- orig_icmdp->cmd_un.scsi.r2t_icmdp = NULL;
- icmdp->cmd_un.r2t.icmdp = NULL;
+
+ iscsi_tx_init_hdr(isp, icp, (iscsi_text_hdr_t *)ihp,
+ ISCSI_OP_SCSI_CMD, icmdp->cmd_itt);
+
+ idm_pdu_init(pdu, icp->conn_ic, (void *)icmdp, &iscsi_tx_done);
+ idm_pdu_init_hdr(pdu, (uint8_t *)ihp, len);
+ pdu->isp_data = NULL;
+ pdu->isp_datalen = 0;
+
/*
- * we're finished with this r2t; there could be another r2t
- * waiting on us to finish, so signal it.
+ * Sestion 12.11 of the iSCSI specification has a good table
+ * describing when uncolicited data and/or immediate data
+ * should be sent.
*/
- cv_broadcast(&orig_icmdp->cmd_completion);
- mutex_exit(&orig_icmdp->cmd_mutex);
- /*
- * the parent command may be waiting for us to finish; if so,
- * wake the _ic_ thread
- */
- if ((orig_icmdp->cmd_state == ISCSI_CMD_STATE_COMPLETED) &&
- (ISCSI_SESS_STATE_FULL_FEATURE(isp->sess_state)) &&
- (orig_icmdp->cmd_un.scsi.r2t_more == B_FALSE))
- iscsi_thread_send_wakeup(isp->sess_ic_thread);
- ASSERT(!mutex_owned(&isp->sess_queue_pending.mutex));
+
+ iscsi_tx_scsi_data(icmdp, ihp, icp, pdu);
+
+ iscsi_tx_scsi_init_pkt(icmdp, ihp);
+
+ /* Calls idm_task_start */
+ iscsi_tx_scsi_init_task(icmdp, icp, ihp);
+
+ mutex_exit(&isp->sess_queue_pending.mutex);
+
+ idm_pdu_tx(pdu);
+
return (rval);
}
-/*
- * iscsi_tx_data -
- */
-static iscsi_status_t
-iscsi_tx_data(iscsi_sess_t *isp, iscsi_conn_t *icp, iscsi_cmd_t *icmdp,
- uint32_t ttt, size_t datalen, size_t offset)
+/* ARGSUSED */
+static void
+iscsi_tx_done(idm_pdu_t *pdu, idm_status_t status)
+{
+ kmem_free((iscsi_hdr_t *)pdu->isp_hdr, pdu->isp_hdrlen);
+ kmem_free(pdu, sizeof (idm_pdu_t));
+}
+
+
+static void
+iscsi_tx_pdu(iscsi_conn_t *icp, int opcode, void *hdr, int hdrlen,
+ iscsi_cmd_t *icmdp)
{
- iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
- struct buf *bp = NULL;
- size_t remainder = 0;
- size_t chunk = 0;
- char *data = NULL;
- uint32_t data_sn = 0;
- iscsi_data_hdr_t idhp;
- uint32_t itt;
- uint32_t lun;
-
- ASSERT(isp != NULL);
- ASSERT(icp != NULL);
- ASSERT(icmdp != NULL);
- bp = icmdp->cmd_un.scsi.bp;
-
- /* verify there is data to send */
- if (bp == NULL) {
- mutex_exit(&isp->sess_queue_pending.mutex);
- return (ISCSI_STATUS_INTERNAL_ERROR);
+ idm_pdu_t *tx_pdu;
+ iscsi_hdr_t *ihp = (iscsi_hdr_t *)hdr;
+
+ tx_pdu = kmem_zalloc(sizeof (idm_pdu_t), KM_SLEEP);
+ ASSERT(tx_pdu != NULL);
+
+ idm_pdu_init(tx_pdu, icp->conn_ic, icmdp, &iscsi_tx_done);
+ idm_pdu_init_hdr(tx_pdu, hdr, hdrlen);
+ if (opcode == ISCSI_OP_TEXT_CMD) {
+ idm_pdu_init_data(tx_pdu,
+ (uint8_t *)icmdp->cmd_un.text.buf,
+ ntoh24(ihp->dlength));
}
- itt = icmdp->cmd_itt;
- lun = icmdp->cmd_un.scsi.lun;
-
- /*
- * update the LUN with the amount of data we will
- * transfer. If there is a failure it's because of
- * a network fault and the command will get flushed.
- */
- icmdp->cmd_un.scsi.data_transferred += datalen;
-
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- remainder = datalen;
- while (remainder) {
-
- /* Check so see if we need to chunk the data */
- if ((icp->conn_params.max_xmit_data_seg_len > 0) &&
- (remainder > icp->conn_params.max_xmit_data_seg_len)) {
- chunk = icp->conn_params.max_xmit_data_seg_len;
- } else {
- chunk = remainder;
- }
-
- /* setup iscsi data hdr */
- bzero(&idhp, sizeof (iscsi_data_hdr_t));
- idhp.opcode = ISCSI_OP_SCSI_DATA;
- idhp.itt = itt;
- idhp.ttt = ttt;
- ISCSI_LUN_BYTE_COPY(idhp.lun, lun);
- idhp.expstatsn = htonl(icp->conn_expstatsn);
- icp->conn_laststatsn = icp->conn_expstatsn;
- idhp.datasn = htonl(data_sn);
- data_sn++;
- idhp.offset = htonl(offset);
- hton24(idhp.dlength, chunk);
-
- if (chunk == remainder) {
- idhp.flags = ISCSI_FLAG_FINAL; /* final chunk */
- }
-
- /* setup data */
- data = bp->b_un.b_addr + offset;
-
- /*
- * Keep track of how much data we have
- * transfer so far and how much is remaining.
- */
- remainder -= chunk;
- offset += chunk;
-
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)&idhp, data,
- ISCSI_CONN_TO_NET_DIGEST(icp));
-
- if (ISCSI_SUCCESS(rval)) {
- KSTAT_ADD_CONN_TX_BYTES(icp, chunk);
- } else {
- break;
- }
- }
-
- return (rval);
+ mutex_exit(&icp->conn_sess->sess_queue_pending.mutex);
+ idm_pdu_tx(tx_pdu);
}
@@ -2172,31 +2140,27 @@
{
iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
iscsi_conn_t *icp = NULL;
- iscsi_nop_out_hdr_t inohp;
+ iscsi_nop_out_hdr_t *inohp;
ASSERT(isp != NULL);
ASSERT(icmdp != NULL);
icp = icmdp->cmd_conn;
ASSERT(icp != NULL);
- bzero(&inohp, sizeof (iscsi_nop_out_hdr_t));
- inohp.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE;
- inohp.flags = ISCSI_FLAG_FINAL;
- inohp.itt = icmdp->cmd_itt;
- inohp.ttt = icmdp->cmd_ttt;
+ inohp = kmem_zalloc(sizeof (iscsi_nop_out_hdr_t), KM_SLEEP);
+ ASSERT(inohp != NULL);
+
+ inohp->opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE;
+ inohp->flags = ISCSI_FLAG_FINAL;
+ inohp->itt = icmdp->cmd_itt;
+ inohp->ttt = icmdp->cmd_ttt;
mutex_enter(&isp->sess_cmdsn_mutex);
- inohp.cmdsn = htonl(isp->sess_cmdsn);
+ inohp->cmdsn = htonl(isp->sess_cmdsn);
mutex_exit(&isp->sess_cmdsn_mutex);
- inohp.expstatsn = htonl(icp->conn_expstatsn);
+ inohp->expstatsn = htonl(icp->conn_expstatsn);
icp->conn_laststatsn = icp->conn_expstatsn;
-
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)&inohp, NULL,
- ISCSI_CONN_TO_NET_DIGEST(icp));
-
+ iscsi_tx_pdu(icp, ISCSI_OP_NOOP_OUT, inohp,
+ sizeof (iscsi_nop_out_hdr_t), icmdp);
return (rval);
}
@@ -2210,32 +2174,28 @@
{
iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
iscsi_conn_t *icp = NULL;
- iscsi_scsi_task_mgt_hdr_t istmh;
+ iscsi_scsi_task_mgt_hdr_t *istmh;
ASSERT(isp != NULL);
ASSERT(icmdp != NULL);
icp = icmdp->cmd_conn;
ASSERT(icp != NULL);
- bzero(&istmh, sizeof (iscsi_scsi_task_mgt_hdr_t));
+ istmh = kmem_zalloc(sizeof (iscsi_scsi_task_mgt_hdr_t), KM_SLEEP);
+ ASSERT(istmh != NULL);
mutex_enter(&isp->sess_cmdsn_mutex);
- istmh.cmdsn = htonl(isp->sess_cmdsn);
+ istmh->cmdsn = htonl(isp->sess_cmdsn);
mutex_exit(&isp->sess_cmdsn_mutex);
- istmh.expstatsn = htonl(icp->conn_expstatsn);
+ istmh->expstatsn = htonl(icp->conn_expstatsn);
icp->conn_laststatsn = icp->conn_expstatsn;
- istmh.itt = icmdp->cmd_itt;
- istmh.opcode = ISCSI_OP_SCSI_TASK_MGT_MSG | ISCSI_OP_IMMEDIATE;
- istmh.function = ISCSI_FLAG_FINAL | ISCSI_TM_FUNC_ABORT_TASK;
- ISCSI_LUN_BYTE_COPY(istmh.lun,
+ istmh->itt = icmdp->cmd_itt;
+ istmh->opcode = ISCSI_OP_SCSI_TASK_MGT_MSG | ISCSI_OP_IMMEDIATE;
+ istmh->function = ISCSI_FLAG_FINAL | ISCSI_TM_FUNC_ABORT_TASK;
+ ISCSI_LUN_BYTE_COPY(istmh->lun,
icmdp->cmd_un.abort.icmdp->cmd_un.scsi.lun);
- istmh.rtt = icmdp->cmd_un.abort.icmdp->cmd_itt;
-
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)&istmh, NULL,
- ISCSI_CONN_TO_NET_DIGEST(icp));
+ istmh->rtt = icmdp->cmd_un.abort.icmdp->cmd_itt;
+ iscsi_tx_pdu(icp, ISCSI_OP_SCSI_TASK_MGT_MSG, istmh,
+ sizeof (iscsi_scsi_task_mgt_hdr_t), icmdp);
return (rval);
}
@@ -2250,30 +2210,31 @@
{
iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
iscsi_conn_t *icp = NULL;
- iscsi_scsi_task_mgt_hdr_t istmh;
+ iscsi_scsi_task_mgt_hdr_t *istmh;
ASSERT(isp != NULL);
ASSERT(icmdp != NULL);
icp = icmdp->cmd_conn;
ASSERT(icp != NULL);
- bzero(&istmh, sizeof (iscsi_scsi_task_mgt_hdr_t));
- istmh.opcode = ISCSI_OP_SCSI_TASK_MGT_MSG | ISCSI_OP_IMMEDIATE;
+ istmh = kmem_zalloc(sizeof (iscsi_scsi_task_mgt_hdr_t), KM_SLEEP);
+ ASSERT(istmh != NULL);
+ istmh->opcode = ISCSI_OP_SCSI_TASK_MGT_MSG | ISCSI_OP_IMMEDIATE;
mutex_enter(&isp->sess_cmdsn_mutex);
- istmh.cmdsn = htonl(isp->sess_cmdsn);
+ istmh->cmdsn = htonl(isp->sess_cmdsn);
mutex_exit(&isp->sess_cmdsn_mutex);
- istmh.expstatsn = htonl(icp->conn_expstatsn);
- istmh.itt = icmdp->cmd_itt;
+ istmh->expstatsn = htonl(icp->conn_expstatsn);
+ istmh->itt = icmdp->cmd_itt;
switch (icmdp->cmd_un.reset.level) {
case RESET_LUN:
- istmh.function = ISCSI_FLAG_FINAL |
+ istmh->function = ISCSI_FLAG_FINAL |
ISCSI_TM_FUNC_LOGICAL_UNIT_RESET;
- ISCSI_LUN_BYTE_COPY(istmh.lun, icmdp->cmd_lun->lun_num);
+ ISCSI_LUN_BYTE_COPY(istmh->lun, icmdp->cmd_lun->lun_num);
break;
case RESET_TARGET:
case RESET_BUS:
- istmh.function = ISCSI_FLAG_FINAL |
+ istmh->function = ISCSI_FLAG_FINAL |
ISCSI_TM_FUNC_TARGET_WARM_RESET;
break;
default:
@@ -2282,12 +2243,8 @@
break;
}
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)&istmh, NULL,
- ISCSI_CONN_TO_NET_DIGEST(icp));
+ iscsi_tx_pdu(icp, ISCSI_OP_SCSI_TASK_MGT_MSG, istmh,
+ sizeof (iscsi_scsi_task_mgt_hdr_t), icmdp);
return (rval);
}
@@ -2302,29 +2259,24 @@
{
iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
iscsi_conn_t *icp = NULL;
- iscsi_logout_hdr_t ilh;
+ iscsi_logout_hdr_t *ilh;
ASSERT(isp != NULL);
ASSERT(icmdp != NULL);
icp = icmdp->cmd_conn;
ASSERT(icp != NULL);
- bzero(&ilh, sizeof (iscsi_logout_hdr_t));
- ilh.opcode = ISCSI_OP_LOGOUT_CMD | ISCSI_OP_IMMEDIATE;
- ilh.flags = ISCSI_FLAG_FINAL | ISCSI_LOGOUT_REASON_CLOSE_SESSION;
- ilh.itt = icmdp->cmd_itt;
- ilh.cid = icp->conn_cid;
+ ilh = kmem_zalloc(sizeof (iscsi_logout_hdr_t), KM_SLEEP);
+ ilh->opcode = ISCSI_OP_LOGOUT_CMD | ISCSI_OP_IMMEDIATE;
+ ilh->flags = ISCSI_FLAG_FINAL | ISCSI_LOGOUT_REASON_CLOSE_SESSION;
+ ilh->itt = icmdp->cmd_itt;
+ ilh->cid = icp->conn_cid;
mutex_enter(&isp->sess_cmdsn_mutex);
- ilh.cmdsn = htonl(isp->sess_cmdsn);
+ ilh->cmdsn = htonl(isp->sess_cmdsn);
mutex_exit(&isp->sess_cmdsn_mutex);
- ilh.expstatsn = htonl(icp->conn_expstatsn);
-
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)&ilh, NULL,
- ISCSI_CONN_TO_NET_DIGEST(icp));
+ ilh->expstatsn = htonl(icp->conn_expstatsn);
+ iscsi_tx_pdu(icp, ISCSI_OP_LOGOUT_CMD, ilh,
+ sizeof (iscsi_logout_hdr_t), icmdp);
return (rval);
}
@@ -2346,32 +2298,23 @@
{
iscsi_status_t rval = ISCSI_STATUS_SUCCESS;
iscsi_conn_t *icp = NULL;
- iscsi_text_hdr_t ith;
-
- ASSERT(isp != NULL);
+ iscsi_text_hdr_t *ith;
+
ASSERT(icmdp != NULL);
icp = icmdp->cmd_conn;
ASSERT(icp != NULL);
- bzero(&ith, sizeof (iscsi_text_hdr_t));
- ith.opcode = ISCSI_OP_TEXT_CMD;
- ith.flags = ISCSI_FLAG_FINAL;
- hton24(ith.dlength, icmdp->cmd_un.text.data_len);
- ith.itt = icmdp->cmd_itt;
- ith.ttt = icmdp->cmd_un.text.ttt;
- mutex_enter(&isp->sess_cmdsn_mutex);
- ith.cmdsn = htonl(isp->sess_cmdsn);
- isp->sess_cmdsn++;
- ith.expstatsn = htonl(icp->conn_expstatsn);
- mutex_exit(&isp->sess_cmdsn_mutex);
- bcopy(icmdp->cmd_un.text.lun, ith.rsvd4, sizeof (ith.rsvd4));
-
- /* release pending queue mutex across the network call */
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- rval = iscsi_net->sendpdu(icp->conn_socket,
- (iscsi_hdr_t *)&ith, icmdp->cmd_un.text.buf,
- ISCSI_CONN_TO_NET_DIGEST(icp));
+ ith = kmem_zalloc(sizeof (iscsi_text_hdr_t), KM_SLEEP);
+ ASSERT(ith != NULL);
+ ith->flags = ISCSI_FLAG_FINAL;
+ hton24(ith->dlength, icmdp->cmd_un.text.data_len);
+ ith->ttt = icmdp->cmd_un.text.ttt;
+ iscsi_tx_init_hdr(isp, icp, (iscsi_text_hdr_t *)ith,
+ ISCSI_OP_TEXT_CMD, icmdp->cmd_itt);
+ bcopy(icmdp->cmd_un.text.lun, ith->rsvd4, sizeof (ith->rsvd4));
+
+ iscsi_tx_pdu(icp, ISCSI_OP_TEXT_CMD, ith, sizeof (iscsi_text_hdr_t),
+ icmdp);
return (rval);
}
@@ -2383,86 +2326,6 @@
*/
/*
- * iscsi_handle_r2t - Create a R2T and put it into the pending queue.
- *
- * Since the rx thread can hold the pending mutex, the tx thread or wd
- * thread may not have chance to check the commands in the pending queue.
- * So if the previous R2T is still there, we will release the related
- * mutex and wait for its completion in case of deadlock.
- */
-static void
-iscsi_handle_r2t(iscsi_conn_t *icp, iscsi_cmd_t *icmdp,
- uint32_t offset, uint32_t length, uint32_t ttt)
-{
- iscsi_sess_t *isp = NULL;
- iscsi_cmd_t *new_icmdp = NULL;
- int owned = 0;
-
- ASSERT(icp != NULL);
- isp = icp->conn_sess;
- ASSERT(isp != NULL);
-
- if (icmdp->cmd_un.scsi.r2t_icmdp != NULL) {
- /*
- * Occasionally the tx thread doesn't have a chance to
- * send commands when we hold the pending mutex.
- * So we should mark this scsi command with more R2T
- * and release the mutex. Then wait for completion.
- */
- icmdp->cmd_un.scsi.r2t_more = B_TRUE;
-
- mutex_exit(&icmdp->cmd_mutex);
- owned = mutex_owned(&icp->conn_queue_active.mutex);
- if (owned != 0) {
- mutex_exit(&icp->conn_queue_active.mutex);
- }
- mutex_exit(&isp->sess_queue_pending.mutex);
-
- /*
- * the transmission from a previous r2t can be
- * slow to return; the array may have sent
- * another r2t at this point, so wait until
- * the first one finishes and signals us.
- */
- mutex_enter(&icmdp->cmd_mutex);
- while (icmdp->cmd_un.scsi.r2t_icmdp != NULL) {
- ASSERT(icmdp->cmd_state != ISCSI_CMD_STATE_COMPLETED);
- cv_wait(&icmdp->cmd_completion, &icmdp->cmd_mutex);
- }
- mutex_exit(&icmdp->cmd_mutex);
-
- mutex_enter(&isp->sess_queue_pending.mutex);
- if (owned != 0) {
- mutex_enter(&icp->conn_queue_active.mutex);
- }
- mutex_enter(&icmdp->cmd_mutex);
- }
-
- /*
- * try to create an R2T task to send it later. If we can't,
- * we're screwed, and the command will eventually time out
- * and be retried by the SCSI layer.
- */
- new_icmdp = iscsi_cmd_alloc(icp, KM_SLEEP);
- new_icmdp->cmd_type = ISCSI_CMD_TYPE_R2T;
- new_icmdp->cmd_un.r2t.icmdp = icmdp;
- new_icmdp->cmd_un.r2t.offset = offset;
- new_icmdp->cmd_un.r2t.length = length;
- new_icmdp->cmd_ttt = ttt;
- new_icmdp->cmd_itt = icmdp->cmd_itt;
- new_icmdp->cmd_lun = icmdp->cmd_lun;
- icmdp->cmd_un.scsi.r2t_icmdp = new_icmdp;
- icmdp->cmd_un.scsi.r2t_more = B_FALSE;
-
- /*
- * pending queue mutex is already held by the
- * tx_thread or rtt_rsp function.
- */
- iscsi_cmd_state_machine(new_icmdp, ISCSI_CMD_EVENT_E1, isp);
-}
-
-
-/*
* iscsi_handle_abort -
*
*/
@@ -2494,6 +2357,40 @@
iscsi_cmd_state_machine(new_icmdp, ISCSI_CMD_EVENT_E1, isp);
}
+/*
+ * Callback from IDM indicating that the task has been suspended or aborted.
+ */
+void
+iscsi_task_aborted(idm_task_t *idt, idm_status_t status)
+{
+ iscsi_cmd_t *icmdp = idt->idt_private;
+ iscsi_conn_t *icp = icmdp->cmd_conn;
+ iscsi_sess_t *isp = icp->conn_sess;
+
+ ASSERT(icmdp->cmd_conn != NULL);
+
+ switch (status) {
+ case IDM_STATUS_SUSPENDED:
+ /*
+ * If the task is suspended, it may be aborted later,
+ * so we can ignore this notification.
+ */
+ break;
+
+ case IDM_STATUS_ABORTED:
+ mutex_enter(&icp->conn_queue_active.mutex);
+ iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E9, isp);
+ mutex_exit(&icp->conn_queue_active.mutex);
+ break;
+
+ default:
+ /*
+ * Unexpected status.
+ */
+ ASSERT(0);
+ }
+
+}
/*
* iscsi_handle_nop -
@@ -2604,6 +2501,21 @@
return (rval);
}
+/*
+ * iscsi_lgoout_start - task handler for deferred logout
+ */
+static void
+iscsi_logout_start(void *arg)
+{
+ iscsi_task_t *itp = (iscsi_task_t *)arg;
+ iscsi_conn_t *icp;
+
+ icp = (iscsi_conn_t *)itp->t_arg;
+
+ mutex_enter(&icp->conn_state_mutex);
+ (void) iscsi_handle_logout(icp);
+ mutex_exit(&icp->conn_state_mutex);
+}
/*
* iscsi_handle_logout - This function will issue a logout for
@@ -2613,13 +2525,23 @@
iscsi_handle_logout(iscsi_conn_t *icp)
{
iscsi_sess_t *isp;
+ idm_conn_t *ic;
iscsi_cmd_t *icmdp;
int rval;
ASSERT(icp != NULL);
isp = icp->conn_sess;
+ ic = icp->conn_ic;
ASSERT(isp != NULL);
ASSERT(isp->sess_hba != NULL);
+ ASSERT(mutex_owned(&icp->conn_state_mutex));
+
+ /*
+ * We may want to explicitly disconnect if something goes wrong so
+ * grab a hold to ensure that the IDM connection context can't
+ * disappear.
+ */
+ idm_conn_hold(ic);
icmdp = iscsi_cmd_alloc(icp, KM_SLEEP);
ASSERT(icmdp != NULL);
@@ -2651,21 +2573,33 @@
/* copy rval */
rval = icmdp->cmd_result;
- /*
- * another way to do this would be to send t17 unconditionally,
- * but then the _rx_ thread would get bumped out with a receive
- * error, and send another t17.
- */
- if (rval != ISCSI_STATUS_SUCCESS) {
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T17);
- }
-
/* clean up */
iscsi_cmd_free(icmdp);
+ if (rval != 0) {
+ /* If the logout failed then drop the connection */
+ idm_ini_conn_disconnect(icp->conn_ic);
+ }
+
+ /* stall until connection settles */
+ while ((icp->conn_state != ISCSI_CONN_STATE_FREE) &&
+ (icp->conn_state != ISCSI_CONN_STATE_FAILED) &&
+ (icp->conn_state != ISCSI_CONN_STATE_POLLING)) {
+ /* wait for transition */
+ cv_wait(&icp->conn_state_change, &icp->conn_state_mutex);
+ }
+
+ idm_conn_rele(ic);
+
+ /*
+ * Return value reflects whether the logout command completed --
+ * regardless of the return value the connection is closed and
+ * ready for reconnection.
+ */
return (rval);
}
+
/*
* iscsi_handle_text - main control function for iSCSI text requests. This
* function handles allocating the command, sending initial text request, and
@@ -2930,6 +2864,132 @@
}
/*
+ * IDM callbacks
+ */
+void
+iscsi_build_hdr(idm_task_t *idm_task, idm_pdu_t *pdu, uint8_t opcode)
+{
+ iscsi_cmd_t *icmdp = idm_task->idt_private;
+ iscsi_conn_t *icp = icmdp->cmd_conn;
+ iscsi_data_hdr_t *ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
+
+ mutex_enter(&icmdp->cmd_mutex);
+ if (opcode == ISCSI_OP_SCSI_DATA) {
+ uint32_t data_sn;
+ uint32_t lun;
+ icmdp = idm_task->idt_private;
+ icp = icmdp->cmd_conn;
+ ihp->opcode = opcode;
+ ihp->itt = icmdp->cmd_itt;
+ ihp->ttt = idm_task->idt_r2t_ttt;
+ ihp->expstatsn = htonl(icp->conn_expstatsn);
+ icp->conn_laststatsn = icp->conn_expstatsn;
+ data_sn = ntohl(ihp->datasn);
+ data_sn++;
+ lun = icmdp->cmd_un.scsi.lun;
+ ISCSI_LUN_BYTE_COPY(ihp->lun, lun);
+ /* CRM: upate_flow_control */
+ ISCSI_IO_LOG(CE_NOTE, "DEBUG: iscsi_build_hdr"
+ "(ISCSI_OP_SCSI_DATA): task: %p icp: %p ic: %p itt: %x "
+ "exp: %d data_sn: %d", (void *)idm_task, (void *)icp,
+ (void *)icp->conn_ic, ihp->itt, icp->conn_expstatsn,
+ data_sn);
+ } else {
+ cmn_err(CE_WARN, "iscsi_build_hdr: unprocessed build "
+ "header opcode: %x", opcode);
+ }
+ mutex_exit(&icmdp->cmd_mutex);
+}
+
+static void
+iscsi_process_rsp_status(iscsi_sess_t *isp, iscsi_conn_t *icp,
+ idm_status_t status)
+{
+ switch (status) {
+ case IDM_STATUS_SUCCESS:
+ if ((isp->sess_state == ISCSI_SESS_STATE_IN_FLUSH) &&
+ (icp->conn_queue_active.count == 0)) {
+ iscsi_drop_conn_cleanup(icp);
+ }
+ break;
+ case IDM_STATUS_PROTOCOL_ERROR:
+ KSTAT_INC_CONN_ERR_PROTOCOL(icp);
+ iscsi_drop_conn_cleanup(icp);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+iscsi_drop_conn_cleanup(iscsi_conn_t *icp) {
+ mutex_enter(&icp->conn_state_mutex);
+ idm_ini_conn_disconnect(icp->conn_ic);
+ mutex_exit(&icp->conn_state_mutex);
+}
+
+void
+iscsi_rx_error_pdu(idm_conn_t *ic, idm_pdu_t *pdu, idm_status_t status)
+{
+ iscsi_conn_t *icp = (iscsi_conn_t *)ic->ic_handle;
+ iscsi_sess_t *isp;
+
+ ASSERT(icp != NULL);
+ isp = icp->conn_sess;
+ ASSERT(isp != NULL);
+ iscsi_process_rsp_status(isp, icp, status);
+ idm_pdu_complete(pdu, status);
+}
+
+void
+iscsi_rx_misc_pdu(idm_conn_t *ic, idm_pdu_t *pdu)
+{
+ iscsi_conn_t *icp;
+ iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
+ iscsi_sess_t *isp;
+ idm_status_t status;
+
+ icp = ic->ic_handle;
+ isp = icp->conn_sess;
+ isp->sess_rx_lbolt = icp->conn_rx_lbolt = ddi_get_lbolt();
+ switch (ihp->opcode & ISCSI_OPCODE_MASK) {
+ case ISCSI_OP_LOGIN_RSP:
+ status = iscsi_rx_process_login_pdu(ic, pdu);
+ idm_pdu_complete(pdu, status);
+ break;
+ case ISCSI_OP_LOGOUT_RSP:
+ status = iscsi_rx_process_logout_rsp(ic, pdu);
+ idm_pdu_complete(pdu, status);
+ break;
+ case ISCSI_OP_REJECT_MSG:
+ status = iscsi_rx_process_reject_rsp(ic, pdu);
+ break;
+ case ISCSI_OP_SCSI_TASK_MGT_RSP:
+ status = iscsi_rx_process_task_mgt_rsp(ic, pdu);
+ idm_pdu_complete(pdu, status);
+ break;
+ case ISCSI_OP_NOOP_IN:
+ status = iscsi_rx_process_nop(ic, pdu);
+ idm_pdu_complete(pdu, status);
+ break;
+ case ISCSI_OP_ASYNC_EVENT:
+ status = iscsi_rx_process_async_rsp(ic, pdu);
+ break;
+ case ISCSI_OP_TEXT_RSP:
+ status = iscsi_rx_process_text_rsp(ic, pdu);
+ idm_pdu_complete(pdu, status);
+ break;
+ default:
+ cmn_err(CE_WARN, "iscsi connection(%u) protocol error "
+ "- received misc unsupported opcode 0x%02x",
+ icp->conn_oid, ihp->opcode);
+ status = IDM_STATUS_PROTOCOL_ERROR;
+ break;
+ }
+ iscsi_process_rsp_status(isp, icp, status);
+}
+
+/*
* +--------------------------------------------------------------------+
* | Beginning of completion routines |
* +--------------------------------------------------------------------+
@@ -2973,12 +3033,10 @@
mutex_enter(&icmdp->cmd_mutex);
/*
* check if the associated r2t/abort has finished
- * yet, and make sure this command has no R2T
- * to handle. If not, don't complete this command.
+ * yet. If not, don't complete the command.
*/
if ((icmdp->cmd_un.scsi.r2t_icmdp == NULL) &&
- (icmdp->cmd_un.scsi.abort_icmdp == NULL) &&
- (icmdp->cmd_un.scsi.r2t_more == B_FALSE)) {
+ (icmdp->cmd_un.scsi.abort_icmdp == NULL)) {
mutex_exit(&icmdp->cmd_mutex);
(void) iscsi_dequeue_cmd(&isp->
sess_queue_completion.head,
@@ -2987,8 +3045,9 @@
--isp->sess_queue_completion.count;
iscsi_enqueue_cmd_head(&q.head,
&q.tail, icmdp);
- } else
+ } else {
mutex_exit(&icmdp->cmd_mutex);
+ }
icmdp = next_icmdp;
}
mutex_exit(&isp->sess_queue_completion.mutex);
@@ -3105,8 +3164,8 @@
iscsi_timeout_checks(iscsi_sess_t *isp)
{
clock_t now = ddi_get_lbolt();
+ iscsi_conn_t *icp;
iscsi_cmd_t *icmdp, *nicmdp;
- iscsi_conn_t *icp;
ASSERT(isp != NULL);
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_ioctl.c Tue Mar 24 17:50:49 2009 -0600
@@ -28,10 +28,10 @@
/*
* Framework interface routines for iSCSI
*/
-#include "iscsi.h" /* main header */
+#include "iscsi.h" /* main header */
+#include <sys/idm/idm_text.h> /* main header */
+#include <sys/iscsi_protocol.h> /* protocol structs */
#include <sys/scsi/adapters/iscsi_if.h> /* ioctl interfaces */
-/* protocol structs and defines */
-#include <sys/iscsi_protocol.h>
#include "persistent.h"
#include <sys/scsi/adapters/iscsi_door.h>
#include "iscsi_targetparam.h"
@@ -237,16 +237,13 @@
iscsi_sess_t *isp;
iscsi_conn_t *icp;
boolean_t rtn;
- struct sockaddr_in6 t_addr;
- socklen_t t_addrlen;
+ idm_conn_t *idm_conn;
/* Let's check the version. */
if (cp->cp_vers != ISCSI_INTERFACE_VERSION) {
return (B_FALSE);
}
- bzero(&t_addr, sizeof (struct sockaddr_in6));
- t_addrlen = sizeof (struct sockaddr_in6);
/* Let's find the session. */
rw_enter(&ihp->hba_sess_list_rwlock, RW_READER);
if (iscsi_sess_get(cp->cp_sess_oid, ihp, &isp) != 0) {
@@ -267,15 +264,35 @@
ASSERT(icp->conn_sig == ISCSI_SIG_CONN);
if (icp->conn_oid == cp->cp_oid) {
- iscsi_net->getsockname(icp->conn_socket,
- (struct sockaddr *)&t_addr, &t_addrlen);
- if (t_addrlen <= sizeof (cp->cp_local)) {
- bcopy(&t_addr, &cp->cp_local, t_addrlen);
+ struct sockaddr_storage *sal;
+ struct sockaddr_storage *sar;
+
+ idm_conn =
+ (idm_conn_t *)icp->conn_ic;
+
+ sal = &idm_conn->ic_laddr;
+ sar = &idm_conn->ic_raddr;
+
+ /* Local Address */
+ if (sal->ss_family == AF_INET) {
+ bcopy(&idm_conn->ic_laddr,
+ &cp->cp_local,
+ sizeof (struct sockaddr_in));
+ } else {
+ bcopy(&idm_conn->ic_laddr,
+ &cp->cp_local,
+ sizeof (struct sockaddr_in6));
}
- ksocket_getpeername((ksocket_t)(icp->conn_socket),
- (struct sockaddr *)&t_addr, &t_addrlen, CRED());
- if (t_addrlen <= sizeof (cp->cp_peer)) {
- bcopy(&t_addr, &cp->cp_peer, t_addrlen);
+
+ /* Peer Address */
+ if (sar->ss_family == AF_INET) {
+ bcopy(&idm_conn->ic_raddr,
+ &cp->cp_peer,
+ sizeof (struct sockaddr_in));
+ } else {
+ bcopy(&idm_conn->ic_raddr,
+ &cp->cp_peer,
+ sizeof (struct sockaddr_in6));
}
if (icp->conn_state == ISCSI_CONN_STATE_LOGGED_IN) {
@@ -345,10 +362,10 @@
/* start login */
mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, ISCSI_CONN_EVENT_T1);
+ status = iscsi_conn_online(icp);
mutex_exit(&icp->conn_state_mutex);
- if (icp->conn_state == ISCSI_CONN_STATE_LOGGED_IN) {
+ if (status == ISCSI_STATUS_SUCCESS) {
data_len = icp->conn_params.max_xmit_data_seg_len;
retry_sendtgts:
/* alloc/init buffer for SendTargets req/resp */
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c Tue Mar 24 17:50:49 2009 -0600
@@ -30,11 +30,12 @@
#include <sys/iscsi_protocol.h>
#include <sys/scsi/adapters/iscsi_door.h>
+boolean_t iscsi_login_logging = B_FALSE;
+
/* internal login protocol interfaces */
static iscsi_status_t iscsi_login(iscsi_conn_t *icp,
- char *buffer, size_t bufsize, uint8_t *status_class,
- uint8_t *status_detail);
-static int iscsi_add_text(iscsi_hdr_t *ihp, char *data,
+ uint8_t *status_class, uint8_t *status_detail);
+static int iscsi_add_text(idm_pdu_t *text_pdu,
int max_data_length, char *param, char *value);
static int iscsi_find_key_value(char *param, char *ihp, char *pdu_end,
char **value_start, char **value_end);
@@ -43,14 +44,16 @@
static iscsi_status_t iscsi_process_login_response(iscsi_conn_t *icp,
iscsi_login_rsp_hdr_t *ilrhp, char *data, int max_data_length);
static iscsi_status_t iscsi_make_login_pdu(iscsi_conn_t *icp,
- iscsi_hdr_t *text_pdu, char *data, int max_data_length);
+ idm_pdu_t *text_pdu, char *data, int max_data_length);
static iscsi_status_t iscsi_update_address(iscsi_conn_t *icp,
char *address);
static char *iscsi_login_failure_str(uchar_t status_class,
uchar_t status_detail);
static void iscsi_login_end(iscsi_conn_t *icp,
- iscsi_conn_event_t event, iscsi_task_t *itp);
+ iscsi_status_t status, iscsi_task_t *itp);
static iscsi_status_t iscsi_login_connect(iscsi_conn_t *icp);
+static void iscsi_login_disconnect(iscsi_conn_t *icp);
+static void iscsi_notice_key_values(iscsi_conn_t *icp);
#define ISCSI_LOGIN_RETRY_DELAY 5 /* seconds */
#define ISCSI_LOGIN_POLLING_DELAY 60 /* seconds */
@@ -72,10 +75,8 @@
iscsi_conn_t *icp;
iscsi_sess_t *isp;
iscsi_hba_t *ihp;
- char *buf;
unsigned char status_class;
unsigned char status_detail;
- int login_buf_size;
clock_t lbolt;
ASSERT(itp != NULL);
@@ -87,6 +88,12 @@
ASSERT(ihp != NULL);
login_start:
+ ASSERT((icp->conn_state == ISCSI_CONN_STATE_IN_LOGIN) ||
+ (icp->conn_state == ISCSI_CONN_STATE_FAILED) ||
+ (icp->conn_state == ISCSI_CONN_STATE_POLLING));
+
+ icp->conn_state_ffp = B_FALSE;
+
/* reset connection statsn */
icp->conn_expstatsn = 0;
icp->conn_laststatsn = 0;
@@ -97,7 +104,7 @@
/* sync up login and session parameters */
if (!ISCSI_SUCCESS(iscsi_conn_sync_params(icp))) {
/* unable to sync params. fail connection attempts */
- iscsi_login_end(icp, ISCSI_CONN_EVENT_T30, itp);
+ iscsi_login_end(icp, ISCSI_STATUS_LOGIN_FAILED, itp);
return (ISCSI_STATUS_LOGIN_FAILED);
}
@@ -107,7 +114,11 @@
delay(icp->conn_login_min - lbolt);
}
- /* Attempt to open TCP connection */
+ /*
+ * Attempt to open TCP connection, associated IDM connection will
+ * have a hold on it that must be released after the call to
+ * iscsi_login() below.
+ */
if (!ISCSI_SUCCESS(iscsi_login_connect(icp))) {
/* retry this failure */
goto login_retry;
@@ -117,19 +128,25 @@
* allocate response buffer with based on default max
* transfer size. This size might shift during login.
*/
- login_buf_size = icp->conn_params.max_xmit_data_seg_len;
- buf = kmem_zalloc(login_buf_size, KM_SLEEP);
+ icp->conn_login_max_data_length =
+ icp->conn_params.max_xmit_data_seg_len;
+ icp->conn_login_data = kmem_zalloc(icp->conn_login_max_data_length,
+ KM_SLEEP);
- /* Start protocol login */
- rval = iscsi_login(icp, buf, login_buf_size,
- &status_class, &status_detail);
+ /*
+ * Start protocol login, upon return we will be either logged in
+ * or disconnected
+ */
+ rval = iscsi_login(icp, &status_class, &status_detail);
/* done with buffer */
- kmem_free(buf, login_buf_size);
+ kmem_free(icp->conn_login_data, icp->conn_login_max_data_length);
+
+ /* Release connection hold */
+ idm_conn_rele(icp->conn_ic);
/* hard failure in login */
if (!ISCSI_SUCCESS(rval)) {
- iscsi_net->close(icp->conn_socket);
/*
* We should just give up retry if these failures are
* detected.
@@ -144,7 +161,7 @@
case ISCSI_STATUS_VERSION_MISMATCH:
case ISCSI_STATUS_NEGO_FAIL:
/* we don't want to retry this failure */
- iscsi_login_end(icp, ISCSI_CONN_EVENT_T30, itp);
+ iscsi_login_end(icp, ISCSI_STATUS_LOGIN_FAILED, itp);
return (ISCSI_STATUS_LOGIN_FAILED);
default:
/* retry this failure */
@@ -156,11 +173,10 @@
switch (status_class) {
case ISCSI_STATUS_CLASS_SUCCESS:
/* login was successful */
- iscsi_login_end(icp, ISCSI_CONN_EVENT_T5, itp);
+ iscsi_login_end(icp, ISCSI_STATUS_SUCCESS, itp);
return (ISCSI_STATUS_SUCCESS);
case ISCSI_STATUS_CLASS_REDIRECT:
/* Retry at the redirected address */
- iscsi_net->close(icp->conn_socket);
goto login_start;
case ISCSI_STATUS_CLASS_TARGET_ERR:
/* retry this failure */
@@ -168,8 +184,6 @@
"%s (0x%02x/0x%02x)", icp->conn_oid,
iscsi_login_failure_str(status_class, status_detail),
status_class, status_detail);
-
- iscsi_net->close(icp->conn_socket);
goto login_retry;
case ISCSI_STATUS_CLASS_INITIATOR_ERR:
default:
@@ -181,12 +195,11 @@
status_class, status_detail, isp->sess_name,
isp->sess_tpgt_conf);
- iscsi_net->close(icp->conn_socket);
-
/* we don't want to retry this failure */
- iscsi_login_end(icp, ISCSI_CONN_EVENT_T30, itp);
+ iscsi_login_end(icp, ISCSI_STATUS_LOGIN_FAILED, itp);
break;
}
+
return (ISCSI_STATUS_LOGIN_FAILED);
login_retry:
@@ -208,20 +221,20 @@
(void(*)())iscsi_login_start, itp, DDI_SLEEP) !=
DDI_SUCCESS) {
iscsi_login_end(icp,
- ISCSI_CONN_EVENT_T7, itp);
+ ISCSI_STATUS_LOGIN_TIMED_OUT, itp);
}
return (ISCSI_STATUS_SUCCESS);
}
} else {
/* Retries exceeded */
- iscsi_login_end(icp, ISCSI_CONN_EVENT_T7, itp);
+ iscsi_login_end(icp, ISCSI_STATUS_LOGIN_TIMED_OUT, itp);
}
+
return (ISCSI_STATUS_LOGIN_FAILED);
}
static void
-iscsi_login_end(iscsi_conn_t *icp, iscsi_conn_event_t event,
- iscsi_task_t *itp)
+iscsi_login_end(iscsi_conn_t *icp, iscsi_status_t status, iscsi_task_t *itp)
{
iscsi_sess_t *isp;
@@ -229,13 +242,85 @@
isp = icp->conn_sess;
ASSERT(isp != NULL);
- mutex_enter(&icp->conn_state_mutex);
- (void) iscsi_conn_state_machine(icp, event);
- mutex_exit(&icp->conn_state_mutex);
+ if (status == ISCSI_STATUS_SUCCESS) {
+ /* Inform IDM of the relevant negotiated values */
+ iscsi_notice_key_values(icp);
+
+ /* We are now logged in */
+ iscsi_conn_update_state(icp, ISCSI_CONN_STATE_LOGGED_IN);
+
+ /* startup TX thread */
+ (void) iscsi_thread_start(icp->conn_tx_thread);
+
+ /*
+ * Move login state machine to LOGIN_FFP. This will
+ * release the taskq thread handling the CN_FFP_ENABLED
+ * allowing the IDM connection state machine to resume
+ * processing events
+ */
+ iscsi_login_update_state(icp, LOGIN_FFP);
+
+ /* Notify the session that a connection is logged in */
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N1);
+ mutex_exit(&isp->sess_state_mutex);
+ } else {
+ /* If login failed reset nego tpgt */
+ isp->sess_tpgt_nego = ISCSI_DEFAULT_TPGT;
- /* If login failed reset nego tpgt */
- if (event != ISCSI_CONN_EVENT_T5) {
- isp->sess_tpgt_nego = ISCSI_DEFAULT_TPGT;
+ mutex_enter(&icp->conn_state_mutex);
+ switch (icp->conn_state) {
+ case ISCSI_CONN_STATE_IN_LOGIN:
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FREE);
+ mutex_exit(&icp->conn_state_mutex);
+ break;
+ case ISCSI_CONN_STATE_FAILED:
+ if (status == ISCSI_STATUS_LOGIN_FAILED) {
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FREE);
+ } else {
+ /* ISCSI_STATUS_LOGIN_TIMED_OUT */
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_POLLING);
+ }
+ mutex_exit(&icp->conn_state_mutex);
+
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp, ISCSI_SESS_EVENT_N6);
+ mutex_exit(&isp->sess_state_mutex);
+
+ if (status == ISCSI_STATUS_LOGIN_TIMED_OUT) {
+ iscsi_conn_retry(isp, icp);
+ }
+ break;
+ case ISCSI_CONN_STATE_POLLING:
+ if (status == ISCSI_STATUS_LOGIN_FAILED) {
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FREE);
+ mutex_exit(&icp->conn_state_mutex);
+
+ mutex_enter(&isp->sess_state_mutex);
+ iscsi_sess_state_machine(isp,
+ ISCSI_SESS_EVENT_N6);
+ mutex_exit(&isp->sess_state_mutex);
+ } else {
+ /* ISCSI_STATUS_LOGIN_TIMED_OUT */
+ if (isp->sess_type == ISCSI_SESS_TYPE_NORMAL) {
+ mutex_exit(&icp->conn_state_mutex);
+
+ iscsi_conn_retry(isp, icp);
+ } else {
+ iscsi_conn_update_state_locked(icp,
+ ISCSI_CONN_STATE_FREE);
+ mutex_exit(&icp->conn_state_mutex);
+ }
+ }
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
}
if (itp->t_blocking == B_FALSE) {
@@ -257,17 +342,22 @@
* allows the caller to decide whether or not to retry logins, so
* that we don't have any policy logic here.
*/
-static iscsi_status_t
-iscsi_login(iscsi_conn_t *icp, char *buffer, size_t bufsize,
- uint8_t *status_class, uint8_t *status_detail)
+iscsi_status_t
+iscsi_login(iscsi_conn_t *icp, uint8_t *status_class, uint8_t *status_detail)
{
iscsi_status_t rval = ISCSI_STATUS_INTERNAL_ERROR;
struct iscsi_sess *isp = NULL;
IscsiAuthClient *auth_client = NULL;
int max_data_length = 0;
- iscsi_hdr_t ihp;
- iscsi_login_rsp_hdr_t *ilrhp = (iscsi_login_rsp_hdr_t *)&ihp;
char *data = NULL;
+ idm_pdu_t *text_pdu;
+ char *buffer;
+ size_t bufsize;
+ iscsi_login_rsp_hdr_t *ilrhp;
+ clock_t response_timeout, timeout_result;
+
+ buffer = icp->conn_login_data;
+ bufsize = icp->conn_login_max_data_length;
ASSERT(icp != NULL);
ASSERT(buffer != NULL);
@@ -277,7 +367,7 @@
ASSERT(isp != NULL);
/*
- * prepare the connection
+ * prepare the connection, hold IDM connection until login completes
*/
icp->conn_current_stage = ISCSI_INITIAL_LOGIN_STAGE;
icp->conn_partial_response = 0;
@@ -298,6 +388,8 @@
cmn_err(CE_WARN, "iscsi connection(%u) login failed - "
"unable to initialize authentication",
icp->conn_oid);
+ iscsi_login_disconnect(icp);
+ iscsi_login_update_state(icp, LOGIN_DONE);
return (ISCSI_STATUS_INTERNAL_ERROR);
}
@@ -353,62 +445,76 @@
max_data_length = bufsize;
rval = ISCSI_STATUS_INTERNAL_ERROR;
+ text_pdu = idm_pdu_alloc(sizeof (iscsi_hdr_t), 0);
+ idm_pdu_init(text_pdu, icp->conn_ic, NULL, NULL);
+
/*
* fill in the PDU header and text data based on the
* login stage that we're in
*/
- rval = iscsi_make_login_pdu(icp, &ihp, data, max_data_length);
+ rval = iscsi_make_login_pdu(icp, text_pdu, data,
+ max_data_length);
if (!ISCSI_SUCCESS(rval)) {
cmn_err(CE_WARN, "iscsi connection(%u) login failed - "
"unable to make login pdu", icp->conn_oid);
goto iscsi_login_done;
}
- /* send a PDU to the target */
- rval = iscsi_net->sendpdu(icp->conn_socket, &ihp, data, 0);
- if (!ISCSI_SUCCESS(rval)) {
- cmn_err(CE_WARN, "iscsi connection(%u) login failed - "
- "failed to transfer login", icp->conn_oid);
+ mutex_enter(&icp->conn_login_mutex);
+ /*
+ * Make sure we are still in LOGIN_START or LOGIN_RX
+ * state before switching to LOGIN_TX. It's possible
+ * for a connection failure to move us to LOGIN_ERROR
+ * before we get to this point.
+ */
+ if (((icp->conn_login_state != LOGIN_READY) &&
+ (icp->conn_login_state != LOGIN_RX)) ||
+ !icp->conn_state_idm_connected) {
+ /* Error occurred */
+ mutex_exit(&icp->conn_login_mutex);
+ rval = (ISCSI_STATUS_INTERNAL_ERROR);
goto iscsi_login_done;
}
- /* read the target's response into the same buffer */
- bzero(buffer, bufsize);
- rval = iscsi_net->recvhdr(icp->conn_socket, &ihp,
- sizeof (ihp), ISCSI_RX_TIMEOUT_VALUE, 0);
- if (!ISCSI_SUCCESS(rval)) {
- if (rval == ISCSI_STATUS_RX_TIMEOUT) {
-#define STRING_FTRLRT "failed to receive login response - timeout"
- cmn_err(CE_WARN,
- "iscsi connection(%u) login failed - "
- STRING_FTRLRT,
- icp->conn_oid);
-#undef STRING_FTRLRT
- } else {
- cmn_err(CE_WARN,
- "iscsi connection(%u) login failed - "
- "failed to receive login response",
- icp->conn_oid);
- }
+ iscsi_login_update_state_locked(icp, LOGIN_TX);
+ icp->conn_login_data = data;
+ icp->conn_login_max_data_length = max_data_length;
+
+ /*
+ * send a PDU to the target. This is asynchronous but
+ * we don't have any particular need for a TX completion
+ * notification since we are going to block waiting for the
+ * receive.
+ */
+ response_timeout = ddi_get_lbolt() +
+ SEC_TO_TICK(ISCSI_RX_TIMEOUT_VALUE);
+ idm_pdu_tx(text_pdu);
+
+ /*
+ * Wait for login failure indication or login RX.
+ * Handler for login response PDU will copy any data into
+ * the buffer pointed to by icp->conn_login_data
+ */
+ while (icp->conn_login_state == LOGIN_TX) {
+ timeout_result = cv_timedwait(&icp->conn_login_cv,
+ &icp->conn_login_mutex, response_timeout);
+ if (timeout_result == -1)
+ break;
+ }
+
+ if (icp->conn_login_state != LOGIN_RX) {
+ mutex_exit(&icp->conn_login_mutex);
+ rval = (ISCSI_STATUS_INTERNAL_ERROR);
goto iscsi_login_done;
}
- isp->sess_rx_lbolt = icp->conn_rx_lbolt = ddi_get_lbolt();
-
- rval = iscsi_net->recvdata(icp->conn_socket, &ihp,
- data, max_data_length, ISCSI_RX_TIMEOUT_VALUE, 0);
- if (!ISCSI_SUCCESS(rval)) {
- cmn_err(CE_WARN, "iscsi connection(%u) login failed - "
- "failed to receive login response",
- icp->conn_oid);
- goto iscsi_login_done;
- }
- isp->sess_rx_lbolt = icp->conn_rx_lbolt = ddi_get_lbolt();
+ mutex_exit(&icp->conn_login_mutex);
/* check the PDU response type */
- if (ihp.opcode != ISCSI_OP_LOGIN_RSP) {
+ ilrhp = (iscsi_login_rsp_hdr_t *)&icp->conn_login_resp_hdr;
+ if (ilrhp->opcode != ISCSI_OP_LOGIN_RSP) {
cmn_err(CE_WARN, "iscsi connection(%u) login failed - "
"received invalid login response (0x%02x)",
- icp->conn_oid, ihp.opcode);
+ icp->conn_oid, ilrhp->opcode);
rval = (ISCSI_STATUS_PROTOCOL_ERROR);
goto iscsi_login_done;
}
@@ -431,8 +537,8 @@
* sending PDUs
*/
rval = iscsi_process_login_response(icp,
- ilrhp, data, max_data_length);
-
+ ilrhp, (char *)icp->conn_login_data,
+ icp->conn_login_max_data_length);
/* pass back whatever error we discovered */
if (!ISCSI_SUCCESS(rval)) {
goto iscsi_login_done;
@@ -445,8 +551,9 @@
* TargetAddress of the redirect, but we don't
* care about the return code.
*/
- (void) iscsi_process_login_response(icp, ilrhp,
- data, max_data_length);
+ (void) iscsi_process_login_response(icp,
+ ilrhp, (char *)icp->conn_login_data,
+ icp->conn_login_max_data_length);
rval = ISCSI_STATUS_SUCCESS;
goto iscsi_login_done;
case ISCSI_STATUS_CLASS_INITIATOR_ERR:
@@ -484,6 +591,20 @@
rval = ISCSI_STATUS_INTERNAL_ERROR;
}
}
+
+ if (ISCSI_SUCCESS(rval) &&
+ (*status_class == ISCSI_STATUS_CLASS_SUCCESS)) {
+ mutex_enter(&icp->conn_state_mutex);
+ while (!icp->conn_state_ffp)
+ cv_wait(&icp->conn_state_change,
+ &icp->conn_state_mutex);
+ mutex_exit(&icp->conn_state_mutex);
+ } else {
+ iscsi_login_disconnect(icp);
+ }
+
+ iscsi_login_update_state(icp, LOGIN_DONE);
+
return (rval);
}
@@ -493,20 +614,21 @@
*
*/
static iscsi_status_t
-iscsi_make_login_pdu(iscsi_conn_t *icp, iscsi_hdr_t *ihp,
+iscsi_make_login_pdu(iscsi_conn_t *icp, idm_pdu_t *text_pdu,
char *data, int max_data_length)
{
struct iscsi_sess *isp = NULL;
int transit = 0;
- iscsi_login_hdr_t *ilhp = (iscsi_login_hdr_t *)ihp;
+ iscsi_hdr_t *ihp = text_pdu->isp_hdr;
+ iscsi_login_hdr_t *ilhp =
+ (iscsi_login_hdr_t *)text_pdu->isp_hdr;
IscsiAuthClient *auth_client = NULL;
int keytype = 0;
int rc = 0;
char value[iscsiAuthStringMaxLength];
ASSERT(icp != NULL);
- ASSERT(ihp != NULL);
- ASSERT(data != NULL);
+ ASSERT(text_pdu != NULL);
isp = icp->conn_sess;
ASSERT(isp != NULL);
@@ -523,6 +645,12 @@
bcopy(&isp->sess_isid[0], &ilhp->isid[0], sizeof (isp->sess_isid));
ilhp->tsid = 0;
+ /*
+ * Set data buffer pointer. The calls to iscsi_add_text will update the
+ * data length.
+ */
+ text_pdu->isp_data = (uint8_t *)data;
+
/* don't increment on immediate */
ilhp->cmdsn = htonl(isp->sess_cmdsn);
@@ -541,7 +669,7 @@
if (icp->conn_current_stage == ISCSI_INITIAL_LOGIN_STAGE) {
if ((isp->sess_hba->hba_name) &&
(isp->sess_hba->hba_name[0])) {
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"InitiatorName",
(char *)isp->sess_hba->hba_name)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -555,7 +683,7 @@
if ((isp->sess_hba->hba_alias) &&
(isp->sess_hba->hba_alias[0])) {
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"InitiatorAlias",
(char *)isp->sess_hba->hba_alias)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -564,18 +692,18 @@
if (isp->sess_type == ISCSI_SESS_TYPE_NORMAL) {
if (isp->sess_name[0] != '\0') {
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"TargetName", (char *)isp->sess_name)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
}
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"SessionType", "Normal")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
} else if (isp->sess_type == ISCSI_SESS_TYPE_DISCOVERY) {
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"SessionType", "Discovery")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
@@ -620,20 +748,20 @@
*/
switch (icp->conn_params.header_digest) {
case ISCSI_DIGEST_NONE:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "HeaderDigest", "None")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
break;
case ISCSI_DIGEST_CRC32C:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length,
"HeaderDigest", "CRC32C")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
break;
case ISCSI_DIGEST_CRC32C_NONE:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "HeaderDigest",
"CRC32C,None")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -641,7 +769,7 @@
break;
default:
case ISCSI_DIGEST_NONE_CRC32C:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "HeaderDigest",
"None,CRC32C")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -651,19 +779,19 @@
switch (icp->conn_params.data_digest) {
case ISCSI_DIGEST_NONE:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DataDigest", "None")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
break;
case ISCSI_DIGEST_CRC32C:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DataDigest", "CRC32C")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
break;
case ISCSI_DIGEST_CRC32C_NONE:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DataDigest",
"CRC32C,None")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -671,7 +799,7 @@
break;
default:
case ISCSI_DIGEST_NONE_CRC32C:
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DataDigest",
"None,CRC32C")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -681,39 +809,39 @@
(void) sprintf(value, "%d",
icp->conn_params.max_recv_data_seg_len);
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"MaxRecvDataSegmentLength", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
(void) sprintf(value, "%d",
icp->conn_params.default_time_to_wait);
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DefaultTime2Wait", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
(void) sprintf(value, "%d",
icp->conn_params.default_time_to_retain);
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DefaultTime2Retain", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
(void) sprintf(value, "%d",
icp->conn_params.error_recovery_level);
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "ErrorRecoveryLevel", "0")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "IFMarker",
icp->conn_params.ifmarker ? "Yes" : "No")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "OFMarker",
icp->conn_params.ofmarker ? "Yes" : "No")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
@@ -725,14 +853,14 @@
*/
if (isp->sess_type != ISCSI_SESS_TYPE_DISCOVERY) {
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "InitialR2T",
icp->conn_params.initial_r2t ?
"Yes" : "No")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "ImmediateData",
icp->conn_params.immediate_data ?
"Yes" : "No")) {
@@ -741,40 +869,40 @@
(void) sprintf(value, "%d",
icp->conn_params.max_burst_length);
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "MaxBurstLength", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
(void) sprintf(value, "%d",
icp->conn_params.first_burst_length);
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"FirstBurstLength", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
(void) sprintf(value, "%d",
icp->conn_params.max_outstanding_r2t);
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"MaxOutstandingR2T", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
(void) sprintf(value, "%d",
icp->conn_params.max_connections);
- if (!iscsi_add_text(ihp, data, max_data_length,
+ if (!iscsi_add_text(text_pdu, max_data_length,
"MaxConnections", value)) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DataPDUInOrder",
icp->conn_params.data_pdu_in_order ?
"Yes" : "No")) {
return (ISCSI_STATUS_INTERNAL_ERROR);
}
- if (!iscsi_add_text(ihp, data,
+ if (!iscsi_add_text(text_pdu,
max_data_length, "DataSequenceInOrder",
icp->conn_params.data_sequence_in_order ?
"Yes" : "No")) {
@@ -811,7 +939,7 @@
int present = 0;
char *key = (char *)iscsiAuthClientGetKeyName(keytype);
int key_length = key ? strlen(key) : 0;
- int pdu_length = ntoh24(ihp->dlength);
+ int pdu_length = text_pdu->isp_datalen;
char *auth_value = data + pdu_length + key_length + 1;
unsigned int max_length = max_data_length -
(pdu_length + key_length + 1);
@@ -837,6 +965,7 @@
* include the value and trailing NULL
*/
pdu_length += strlen(auth_value) + 1;
+ text_pdu->isp_datalen = pdu_length;
hton24(ihp->dlength, pdu_length);
}
}
@@ -1638,7 +1767,7 @@
* terminated strings
*/
int
-iscsi_add_text(iscsi_hdr_t *ihp, char *data, int max_data_length,
+iscsi_add_text(idm_pdu_t *text_pdu, int max_data_length,
char *param, char *value)
{
int param_len = 0;
@@ -1648,8 +1777,7 @@
char *text = NULL;
char *end = NULL;
- ASSERT(ihp != NULL);
- ASSERT(data != NULL);
+ ASSERT(text_pdu != NULL);
ASSERT(param != NULL);
ASSERT(value != NULL);
@@ -1657,9 +1785,9 @@
value_len = strlen(value);
/* param, separator, value, and trailing NULL */
length = param_len + 1 + value_len + 1;
- pdu_length = ntoh24(ihp->dlength);
- text = data + pdu_length;
- end = data + max_data_length;
+ pdu_length = text_pdu->isp_datalen;
+ text = (char *)text_pdu->isp_data + pdu_length;
+ end = (char *)text_pdu->isp_data + max_data_length;
pdu_length += length;
if (text + length >= end) {
@@ -1681,7 +1809,8 @@
*text++ = '\0';
/* update the length in the PDU header */
- hton24(ihp->dlength, pdu_length);
+ text_pdu->isp_datalen = pdu_length;
+ hton24(text_pdu->isp_hdr->dlength, pdu_length);
return (1);
}
@@ -1832,6 +1961,31 @@
return (ISCSI_STATUS_SUCCESS);
}
+void
+iscsi_login_update_state(iscsi_conn_t *icp, iscsi_login_state_t next_state)
+{
+ mutex_enter(&icp->conn_login_mutex);
+ (void) iscsi_login_update_state_locked(icp, next_state);
+ mutex_exit(&icp->conn_login_mutex);
+}
+
+void
+iscsi_login_update_state_locked(iscsi_conn_t *icp,
+ iscsi_login_state_t next_state)
+{
+ ASSERT(mutex_owned(&icp->conn_login_mutex));
+ next_state = (next_state > LOGIN_MAX) ? LOGIN_MAX : next_state;
+ idm_sm_audit_state_change(&icp->conn_state_audit,
+ SAS_ISCSI_LOGIN, icp->conn_login_state, next_state);
+
+ ISCSI_LOGIN_LOG(CE_NOTE, "iscsi_login_update_state conn %p %d -> %d",
+ (void *)icp, icp->conn_login_state, next_state);
+
+ icp->conn_login_state = next_state;
+ cv_broadcast(&icp->conn_login_cv);
+}
+
+
/*
* iscsi_null_callback - This callback may be used under certain
@@ -1937,9 +2091,8 @@
iscsi_hba_t *ihp;
iscsi_sess_t *isp;
struct sockaddr *addr;
- struct sockaddr_in6 t_addr;
- struct sonode *so = NULL;
- socklen_t t_addrlen;
+ idm_conn_req_t cr;
+ idm_status_t rval;
ASSERT(icp != NULL);
isp = icp->conn_sess;
@@ -1948,49 +2101,150 @@
ASSERT(ihp != NULL);
addr = &icp->conn_curr_addr.sin;
- t_addrlen = sizeof (struct sockaddr_in6);
- bzero(&t_addr, sizeof (struct sockaddr_in6));
- so = iscsi_net->socket(addr->sa_family, SOCK_STREAM, 0);
- if (so == NULL) {
- cmn_err(CE_WARN, "iscsi connection(%u) unable "
- "to acquire socket resources", icp->conn_oid);
- return (ISCSI_STATUS_INTERNAL_ERROR);
- }
-
- /* bind if enabled */
- if (icp->conn_bound == B_TRUE) {
- /* bind socket */
- if (iscsi_net->bind(so, &icp->conn_bound_addr.sin,
- SIZEOF_SOCKADDR(addr), 0, 0)) {
- cmn_err(CE_NOTE, "iscsi connection(%u) - "
- "bind failed\n", icp->conn_oid);
- }
- }
-
/* Make sure that scope_id is zero if it is an IPv6 address */
if (addr->sa_family == AF_INET6) {
((struct sockaddr_in6 *)addr)->sin6_scope_id = 0;
}
- /* connect socket to target portal (ip,port) */
- if (!ISCSI_SUCCESS(iscsi_net->connect(so, addr,
- SIZEOF_SOCKADDR(addr), 0, 0))) {
+ /* delay the connect process if required */
+ lbolt = ddi_get_lbolt();
+ if (lbolt < icp->conn_login_min) {
+ delay(icp->conn_login_min - lbolt);
+ }
+
+ /* Create IDM connection context */
+ cr.cr_domain = addr->sa_family;
+ cr.cr_type = SOCK_STREAM;
+ cr.cr_protocol = 0;
+ cr.cr_bound = icp->conn_bound;
+ cr.cr_li = icp->conn_sess->sess_hba->hba_li;
+ cr.icr_conn_ops.icb_rx_misc = &iscsi_rx_misc_pdu;
+ cr.icr_conn_ops.icb_rx_error = &iscsi_rx_error_pdu;
+ cr.icr_conn_ops.icb_rx_scsi_rsp = &iscsi_rx_scsi_rsp;
+ cr.icr_conn_ops.icb_client_notify = &iscsi_client_notify;
+ cr.icr_conn_ops.icb_build_hdr = &iscsi_build_hdr;
+ cr.icr_conn_ops.icb_task_aborted = &iscsi_task_aborted;
+ bcopy(addr, &cr.cr_ini_dst_addr,
+ sizeof (cr.cr_ini_dst_addr));
+ bcopy(&icp->conn_bound_addr, &cr.cr_bound_addr,
+ sizeof (cr.cr_bound_addr));
+ /*
+ * Allocate IDM connection context
+ */
+ rval = idm_ini_conn_create(&cr, &icp->conn_ic);
+ if (rval != IDM_STATUS_SUCCESS) {
+ return (ISCSI_STATUS_LOGIN_FAILED);
+ }
+
+ icp->conn_ic->ic_handle = icp;
+
+ /*
+ * About to initiate connect, reset login state.
+ */
+ iscsi_login_update_state(icp, LOGIN_START);
+
+ /*
+ * Make sure the connection doesn't go away until we are done with it.
+ * This hold will prevent us from receiving a CN_CONNECT_DESTROY
+ * notification on this connection until we are ready.
+ */
+ idm_conn_hold(icp->conn_ic);
+
+ /*
+ * Attempt connection. Upon return we will either be ready to
+ * login or disconnected. If idm_ini_conn_connect fails we
+ * will eventually receive a CN_CONNECT_DESTROY at which point
+ * we will destroy the connection allocated above (so there
+ * is no need to explicitly free it here).
+ */
+ rval = idm_ini_conn_connect(icp->conn_ic);
+
+ if (rval != IDM_STATUS_SUCCESS) {
cmn_err(CE_NOTE, "iscsi connection(%u) unable to "
"connect to target %s", icp->conn_oid,
icp->conn_sess->sess_name);
-
- /* ---- 2 indicates both cantsend and cantrecv ---- */
- iscsi_net->shutdown(so, 2);
- return (ISCSI_STATUS_INTERNAL_ERROR);
+ idm_conn_rele(icp->conn_ic);
}
- icp->conn_socket = so;
- if (iscsi_net->getsockname(icp->conn_socket,
- (struct sockaddr *)&t_addr, &t_addrlen) != 0) {
- cmn_err(CE_NOTE, "iscsi connection(%u) failed to get "
- "socket information", icp->conn_oid);
+ return (rval == IDM_STATUS_SUCCESS ?
+ ISCSI_STATUS_SUCCESS : ISCSI_STATUS_INTERNAL_ERROR);
+}
+
+/*
+ * iscsi_login_disconnect
+ */
+static void
+iscsi_login_disconnect(iscsi_conn_t *icp)
+{
+ /* Tell IDM to disconnect is if we are not already disconnect */
+ idm_ini_conn_disconnect_sync(icp->conn_ic);
+
+ /*
+ * The function above may return before the CN_CONNECT_LOST
+ * notification. Wait for it.
+ */
+ mutex_enter(&icp->conn_state_mutex);
+ while (icp->conn_state_idm_connected)
+ cv_wait(&icp->conn_state_change,
+ &icp->conn_state_mutex);
+ mutex_exit(&icp->conn_state_mutex);
+}
+
+/*
+ * iscsi_notice_key_values - Create an nvlist containing the values
+ * that have been negotiated for this connection and pass them down to
+ * IDM so it can pick up any values that are important.
+ */
+static void
+iscsi_notice_key_values(iscsi_conn_t *icp)
+{
+ nvlist_t *neg_nvl;
+ int rc;
+
+ rc = nvlist_alloc(&neg_nvl, NV_UNIQUE_NAME, KM_SLEEP);
+ ASSERT(rc == 0);
+
+ /* Only crc32c is supported so the digest logic is simple */
+ if (icp->conn_params.header_digest) {
+ rc = nvlist_add_string(neg_nvl, "HeaderDigest", "crc32c");
+ } else {
+ rc = nvlist_add_string(neg_nvl, "HeaderDigest", "none");
}
+ ASSERT(rc == 0);
- return (ISCSI_STATUS_SUCCESS);
+ if (icp->conn_params.data_digest) {
+ rc = nvlist_add_string(neg_nvl, "DataDigest", "crc32c");
+ } else {
+ rc = nvlist_add_string(neg_nvl, "DataDigest", "none");
+ }
+ ASSERT(rc == 0);
+
+ rc = nvlist_add_uint64(neg_nvl, "MaxRecvDataSegmentLength",
+ (uint64_t)icp->conn_params.max_recv_data_seg_len);
+ ASSERT(rc == 0);
+
+ rc = nvlist_add_uint64(neg_nvl, "MaxBurstLength",
+ (uint64_t)icp->conn_params.max_burst_length);
+ ASSERT(rc == 0);
+
+ rc = nvlist_add_uint64(neg_nvl, "MaxOutstandingR2T",
+ (uint64_t)icp->conn_params.max_outstanding_r2t);
+ ASSERT(rc == 0);
+
+ rc = nvlist_add_uint64(neg_nvl, "ErrorRecoveryLevel",
+ (uint64_t)icp->conn_params.error_recovery_level);
+ ASSERT(rc == 0);
+
+ rc = nvlist_add_uint64(neg_nvl, "DefaultTime2Wait",
+ (uint64_t)icp->conn_params.default_time_to_wait);
+ ASSERT(rc == 0);
+
+ rc = nvlist_add_uint64(neg_nvl, "DefaultTime2Retain",
+ (uint64_t)icp->conn_params.default_time_to_retain);
+ ASSERT(rc == 0);
+
+ /* Pass the list to IDM to examine, then free it */
+ idm_notice_key_values(icp->conn_ic, neg_nvl);
+ nvlist_free(neg_nvl);
}
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c Tue Mar 24 17:50:49 2009 -0600
@@ -153,12 +153,6 @@
extern ib_boot_prop_t *iscsiboot_prop;
/* prototypes */
-
-/* for iSCSI boot */
-static int net_up = 0;
-static iscsi_status_t iscsi_net_interface();
-/* boot prototypes end */
-
static void * iscsi_net_socket(int domain, int type, int protocol);
static int iscsi_net_bind(void *socket, struct sockaddr *
name, int name_len, int backlog, int flags);
@@ -278,11 +272,6 @@
ksocket_t socket;
int err = 0;
- if (!modrootloaded && !net_up && iscsiboot_prop) {
- if (iscsi_net_interface() == ISCSI_STATUS_SUCCESS)
- net_up = 1;
- }
-
err = ksocket_socket(&socket, domain, type, protocol, KSOCKET_SLEEP,
CRED());
if (!err)
@@ -860,7 +849,7 @@
return (ISCSI_STATUS_SUCCESS);
}
-static iscsi_status_t
+iscsi_status_t
iscsi_net_interface()
{
struct in_addr braddr;
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_queue.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_queue.c Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
* Copyright 2000 by Cisco Systems, Inc. All rights reserved.
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* iSCSI Software Initiator
@@ -222,6 +222,51 @@
}
/*
+ * iscsi_enqueue_idm_aborting_cmd - used to add a command to the queue
+ * representing command waiting for a callback from IDM for aborting
+ *
+ * Not sorted
+ */
+void
+iscsi_enqueue_idm_aborting_cmd(iscsi_conn_t *icp, iscsi_cmd_t *icmdp)
+{
+ iscsi_sess_t *isp = NULL;
+
+ ASSERT(icp != NULL);
+ ASSERT(icmdp != NULL);
+ isp = icp->conn_sess;
+ ASSERT(isp != NULL);
+ ASSERT(icmdp->cmd_type == ISCSI_CMD_TYPE_SCSI);
+ ASSERT(mutex_owned(&icp->conn_queue_idm_aborting.mutex));
+
+ icmdp->cmd_state = ISCSI_CMD_STATE_IDM_ABORTING;
+ icmdp->cmd_lbolt_idm_aborting = ddi_get_lbolt();
+ iscsi_enqueue_cmd_tail(&icp->conn_queue_idm_aborting.head,
+ &icp->conn_queue_idm_aborting.tail, icmdp);
+ icp->conn_queue_idm_aborting.count++;
+}
+
+/*
+ * iscsi_dequeue_idm_aborting_cmd - used to remove a command from the queue
+ * representing commands waiting for a callback from IDM for aborting.
+ */
+void
+iscsi_dequeue_idm_aborting_cmd(iscsi_conn_t *icp, iscsi_cmd_t *icmdp)
+{
+ iscsi_sess_t *isp = NULL;
+
+ ASSERT(icp != NULL);
+ ASSERT(icmdp != NULL);
+ isp = icp->conn_sess;
+ ASSERT(isp != NULL);
+ ASSERT(mutex_owned(&icp->conn_queue_idm_aborting.mutex));
+
+ (void) iscsi_dequeue_cmd(&icp->conn_queue_idm_aborting.head,
+ &icp->conn_queue_idm_aborting.tail, icmdp);
+ icp->conn_queue_idm_aborting.count--;
+}
+
+/*
* iscsi_enqueue_completed_cmd - used to add a command in completion queue
*/
void
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_sess.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_sess.c Tue Mar 24 17:50:49 2009 -0600
@@ -33,6 +33,7 @@
#define ISCSI_SESS_ENUM_TIMEOUT_DEFAULT 60
#define SCSI_INQUIRY_PQUAL_MASK 0xE0
+boolean_t iscsi_sess_logging = B_FALSE;
/*
* used to store report lun information found
*
@@ -214,6 +215,7 @@
isp->sess_last_err = NoError;
isp->sess_tsid = 0;
isp->sess_type = type;
+ idm_sm_audit_init(&isp->sess_state_audit);
/* copy default driver login parameters */
bcopy(&ihp->hba_params, &isp->sess_params,
@@ -398,9 +400,7 @@
* in our list until end of the list.
*/
while (icp != NULL) {
- if (iscsi_conn_state_machine(
- icp, ISCSI_CONN_EVENT_T1) ==
- ISCSI_STATUS_SUCCESS) {
+ if (iscsi_conn_online(icp) == ISCSI_STATUS_SUCCESS) {
mutex_exit(&icp->conn_state_mutex);
break;
} else {
@@ -491,6 +491,7 @@
}
/* The next step is to logout of the connections. */
+ rw_enter(&isp->sess_conn_list_rwlock, RW_WRITER);
icp = isp->sess_conn_list;
while (icp != NULL) {
rval = iscsi_conn_offline(icp);
@@ -503,13 +504,15 @@
return (rval);
}
}
+ rw_exit(&isp->sess_conn_list_rwlock);
/*
* At this point all connections should be in
* a FREE state which will have pushed the session
* to a FREE state.
*/
- ASSERT(isp->sess_state == ISCSI_SESS_STATE_FREE);
+ ASSERT(isp->sess_state == ISCSI_SESS_STATE_FREE ||
+ isp->sess_state == ISCSI_SESS_STATE_FAILED);
/* Stop watchdog before destroying connections */
if (isp->sess_wd_thread) {
@@ -794,6 +797,31 @@
return (B_TRUE);
}
+/*
+ * iscsi_sess_reserve_itt - Used to reserve an ITT hash slot
+ */
+iscsi_status_t
+iscsi_sess_reserve_scsi_itt(iscsi_cmd_t *icmdp)
+{
+ idm_task_t *itp;
+ iscsi_conn_t *icp = icmdp->cmd_conn;
+ itp = idm_task_alloc(icp->conn_ic);
+ if (itp == NULL)
+ return (ISCSI_STATUS_INTERNAL_ERROR);
+ itp->idt_private = icmdp;
+ icmdp->cmd_itp = itp;
+ icmdp->cmd_itt = itp->idt_tt;
+ return (ISCSI_STATUS_SUCCESS);
+}
+
+/*
+ * iscsi_sess_release_scsi_itt - Used to release ITT hash slot
+ */
+void
+iscsi_sess_release_scsi_itt(iscsi_cmd_t *icmdp)
+{
+ idm_task_free(icmdp->cmd_itp);
+}
/*
* iscsi_sess_reserve_itt - Used to reserve an ITT hash slot
@@ -807,6 +835,12 @@
}
/*
+ * Keep itt values out of the range used by IDM
+ */
+ if (isp->sess_itt < IDM_TASKIDS_MAX)
+ isp->sess_itt = IDM_TASKIDS_MAX;
+
+ /*
* Find the next available slot. Normally its the
* slot pointed to by the session's sess_itt value.
* If this is not true the table has become fragmented.
@@ -821,7 +855,7 @@
/* reserve slot and update counters */
icmdp->cmd_itt = isp->sess_itt;
- isp->sess_cmd_table[icmdp->cmd_itt %
+ isp->sess_cmd_table[isp->sess_itt %
ISCSI_CMD_TABLE_SIZE] = icmdp;
isp->sess_cmd_table_count++;
isp->sess_itt++;
@@ -947,9 +981,16 @@
char *, iscsi_sess_state_str(isp->sess_state),
char *, iscsi_sess_event_str(event));
+ /* Audit event */
+ idm_sm_audit_event(&isp->sess_state_audit,
+ SAS_ISCSI_SESS, isp->sess_state, event, NULL);
+
isp->sess_prev_state = isp->sess_state;
isp->sess_state_lbolt = ddi_get_lbolt();
+ ISCSI_SESS_LOG(CE_NOTE,
+ "DEBUG: sess_state: isp: %p state: %d event: %d",
+ (void *)isp, isp->sess_state, event);
switch (isp->sess_state) {
case ISCSI_SESS_STATE_FREE:
iscsi_sess_state_free(isp, event);
@@ -969,6 +1010,12 @@
default:
ASSERT(FALSE);
}
+
+ /* Audit state change */
+ if (isp->sess_prev_state != isp->sess_state) {
+ idm_sm_audit_state_change(&isp->sess_state_audit,
+ SAS_ISCSI_SESS, isp->sess_prev_state, isp->sess_state);
+ }
}
@@ -1755,13 +1802,13 @@
for (ilp = isp->sess_lun_list; ilp; ilp = ilp_next) {
ilp_next = ilp->lun_next;
- for (lun_count = lun_start;
- lun_count < lun_total; lun_count++) {
- /*
- * if the first lun in saved_replun_ptr buffer has already
- * been found we can move on and do not have to check this lun
- * in the future
- */
+ for (lun_count = lun_start; lun_count < lun_total;
+ lun_count++) {
+ /*
+ * if the first lun in saved_replun_ptr buffer has
+ * already been found we can move on and do not
+ * have to check this lun in the future
+ */
if (lun_count == lun_start &&
saved_replun_ptr[lun_start].lun_found) {
lun_start++;
@@ -1812,10 +1859,10 @@
saved_replun_ptr[lun_count].lun_valid = B_TRUE;
saved_replun_ptr[lun_count].lun_num = lun_num;
if (ilp->lun_num == lun_num) {
- /*
- * lun is found in the SCSI Report Lun buffer
- * make sure the lun is in the ONLINE state
- */
+ /*
+ * lun is found in the SCSI Report Lun buffer
+ * make sure the lun is in the ONLINE state
+ */
saved_replun_ptr[lun_count].lun_found = B_TRUE;
if ((ilp->lun_state &
ISCSI_LUN_STATE_OFFLINE) ||
@@ -1837,10 +1884,10 @@
}
if (lun_count == lun_total) {
- /*
- * this lun we found in the sess->lun_list does not exist
- * anymore, need to offline this lun
- */
+ /*
+ * this lun we found in the sess->lun_list does
+ * not exist anymore, need to offline this lun
+ */
DTRACE_PROBE2(sess_reportluns_lun_no_longer_exists,
int, ilp->lun_num, int, ilp->lun_state);
@@ -1866,10 +1913,11 @@
continue;
}
} else {
- /*
- * lun information is in the saved_replun buffer
- * if this lun has been found already, then we can move on
- */
+ /*
+ * lun information is in the saved_replun buffer
+ * if this lun has been found already,
+ * then we can move on
+ */
if (saved_replun_ptr[lun_count].lun_found == B_TRUE) {
continue;
}
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_stats.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_stats.c Tue Mar 24 17:50:49 2009 -0600
@@ -19,12 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* iSCSI Software Initiator
*/
+#define ISCSI_ICS_NAMES
#include "iscsi.h" /* main header */
kstat_item_t kstat_items_hba[KN_HBA_IDX_MAX] = {
@@ -279,8 +280,8 @@
/* String indicating the state of the session */
ptr = iscsi_sess_state_str(isp->sess_state);
len = strlen(ptr);
- if (len > sizeof (iss->state_str)) {
- len = sizeof (iss->state_str);
+ if (len > (sizeof (iss->state_str) - 1)) {
+ len = sizeof (iss->state_str) - 1;
}
bzero(iss->state_str, sizeof (iss->state_str));
bcopy(ptr, iss->state_str, len);
@@ -410,17 +411,15 @@
{
iscsi_conn_t *icp = (iscsi_conn_t *)ks->ks_private;
iscsi_conn_stats_t *ics = &icp->stats.ks_data;
- char *ptr;
int len;
if (rw == KSTAT_READ) {
- ptr = iscsi_conn_state_str(icp->conn_state);
- len = strlen(ptr);
- if (len > sizeof (ics->state_str)) {
- len = sizeof (ics->state_str);
+ len = strlen(iscsi_ics_name[icp->conn_state]);
+ if (len > (sizeof (ics->state_str) - 1)) {
+ len = sizeof (ics->state_str) - 1;
}
bzero(ics->state_str, sizeof (ics->state_str));
- bcopy(ptr, ics->state_str, len);
+ bcopy(iscsi_ics_name[icp->conn_state], ics->state_str, len);
kstat_named_setstr(&ics->kn[KN_CONN_IDX_STATE],
(const char *)ics->state_str);
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsid.c Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsid.c Tue Mar 24 17:50:49 2009 -0600
@@ -76,6 +76,7 @@
extern int modrootloaded;
int iscsi_configroot_retry = 20;
static boolean_t iscsi_configroot_printed = FALSE;
+static int iscsi_net_up = 0;
extern ib_boot_prop_t *iscsiboot_prop;
/*
@@ -706,6 +707,15 @@
" iSCSI boot session...");
iscsi_configroot_printed = B_TRUE;
}
+ if (iscsi_net_up == 0) {
+ if (iscsi_net_interface() == ISCSI_STATUS_SUCCESS) {
+ iscsi_net_up = 1;
+ } else {
+ cmn_err(CE_WARN, "Failed to configure interface"
+ " for iSCSI boot session");
+ return;
+ }
+ }
while (rc == B_FALSE && retry <
iscsi_configroot_retry) {
rc = iscsid_login_tgt(ihp, name,
@@ -787,6 +797,11 @@
" iSCSI boot session...");
iscsi_configroot_printed = B_TRUE;
}
+ if (iscsi_net_up == 0) {
+ if (iscsi_net_interface() == ISCSI_STATUS_SUCCESS) {
+ iscsi_net_up = 1;
+ }
+ }
while (rc == B_FALSE && retry <
iscsi_configroot_retry) {
rc = iscsid_login_tgt(ihp, NULL,
@@ -1060,6 +1075,13 @@
/* create or find matching connection */
if (!ISCSI_SUCCESS(iscsi_conn_create(addr_tgt, isp, &icp))) {
+ /*
+ * Teardown the session we just created. It can't
+ * have any luns or connections associated with it
+ * so this should always succeed (luckily since what
+ * would we do if it failed?)
+ */
+ (void) iscsi_sess_destroy(isp);
rtn = B_FALSE;
break;
}
--- a/usr/src/uts/common/sys/byteorder.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/sys/byteorder.h Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -181,14 +181,13 @@
#define BE_IN32(xa) htonl(*((uint32_t *)(void *)(xa)))
#endif /* !__i386 && !__amd64 */
-#if !defined(_LP64) && !defined(_LONGLONG_TYPE)
-#if (!defined(__i386) && !defined(__amd64)) /* sparc */
+#if (!defined(__i386) && !defined(__amd64)) || \
+ (!defined(_LP64) && !defined(_LONGLONG_TYPE))
#define BE_IN64(xa) \
(((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa) + 4))
#else /* x86 */
#define BE_IN64(xa) htonll(*((uint64_t *)(void *)(xa)))
-#endif /* (!__i386 && !__amd64) */
-#endif /* !_LP64 && !_LONGLONG_TYPE */
+#endif /* (!__i386 && !__amd64) || (!_LP64 && !_LONGLONG_TYPE) */
#define LE_IN8(xa) \
*((uint8_t *)(xa))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ib/clients/iser/iser.h Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,224 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ISER_H
+#define _ISER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/cmn_err.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sunddi.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+
+#include <sys/idm/idm.h>
+#include <sys/ib/clients/iser/iser_ib.h>
+#include <sys/ib/clients/iser/iser_resource.h>
+#include <sys/ib/clients/iser/iser_cm.h>
+#include <sys/ib/clients/iser/iser_xfer.h>
+
+/*
+ * iser.h
+ * Definitions and macros related to iSER core functionality,
+ * softstate and DDI routines.
+ */
+extern boolean_t iser_logging;
+#define ISER_LOG if (iser_logging) cmn_err
+
+#define ISER_TASKQ_NTHREADS 4
+
+#define ISER_HEADER_LENGTH 28
+
+#define ISER_DELAY_HALF_SECOND 500000 /* for use with drv_usectohz() */
+
+/* iSER Operational Parameters */
+#define ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN 0x200
+#define ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX 0xFFFFFF
+#define ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX 0xFFFFFF
+#define ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_DEFAULT \
+ ISCSI_DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH
+#define ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN 0x200
+#define ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX 0xFFFFFF
+#define ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX 0xFFFFFF
+#define ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_DEFAULT \
+ ISCSI_DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH
+#define ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN 0x0
+#define ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX 0xFFFFFFFF
+#define ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX 0xFFFFFFFF
+#define ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_DEFAULT 0x0
+
+/* iSCSI key names that iSER is interested in */
+#define ISER_KV_KEY_NAME_RDMA_EXTENSIONS "RDMAExtensions"
+#define ISER_KV_KEY_NAME_OF_MARKER "OFMarker"
+#define ISER_KV_KEY_NAME_IF_MARKER "IFMarker"
+#define ISER_KV_KEY_NAME_TGT_RECV_SEGLEN "TargetRecvDataSegmentLength"
+#define ISER_KV_KEY_NAME_INI_RECV_SEGLEN "InitiatorRecvDataSegmentLength"
+#define ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU "MaxOutstandingUnexpectedPDUs"
+
+typedef struct iser_sbind_s {
+ list_node_t is_list_node;
+ ibt_sbind_hdl_t is_sbindhdl;
+ ib_gid_t is_gid;
+ ib_guid_t is_guid;
+} iser_sbind_t;
+
+/* iSER-specific portion of idm_svc_t */
+typedef struct iser_svc_s {
+ idm_refcnt_t is_refcnt;
+ ib_svc_id_t is_svcid;
+ ibt_srv_hdl_t is_srvhdl;
+ /* list of service bind handles - one per HCA port */
+ list_t is_sbindlist;
+} iser_svc_t;
+
+/*
+ * iSER endpoint connection type
+ */
+typedef enum {
+ ISER_CONN_TYPE_INI = 1,
+ ISER_CONN_TYPE_TGT
+} iser_conn_type_t;
+
+/*
+ * iSER Connection States to keep track of the connection going into
+ * iSER-assisted mode
+ */
+typedef enum {
+ ISER_CONN_STAGE_UNDEFINED,
+ ISER_CONN_STAGE_ALLOCATED, /* conn handle allocated */
+ ISER_CONN_STAGE_IC_CONNECTED, /* conn established */
+ ISER_CONN_STAGE_HELLO_SENT, /* hello exchange stages */
+ ISER_CONN_STAGE_HELLO_SENT_FAIL,
+ ISER_CONN_STAGE_HELLO_WAIT,
+ ISER_CONN_STAGE_HELLO_RCV,
+ ISER_CONN_STAGE_HELLO_RCV_FAIL,
+ ISER_CONN_STAGE_HELLOREPLY_SENT,
+ ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL,
+ ISER_CONN_STAGE_HELLOREPLY_RCV,
+ ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL,
+ ISER_CONN_STAGE_LOGGED_IN,
+ ISER_CONN_STAGE_IC_DISCONNECTED, /* conn disconnected */
+ ISER_CONN_STAGE_IC_FREED, /* conn handle allocated */
+ ISER_CONN_STAGE_CLOSING, /* channel closing */
+ ISER_CONN_STAGE_CLOSED /* channel closed */
+} iser_conn_stage_t;
+
+/*
+ * iSER operations parameters negotiated for a given connection
+ */
+typedef struct iser_op_params_s {
+ uint32_t op_header_digest:1,
+ op_data_digest:1,
+ op_rdma_extensions:1,
+ op_ofmarker:1,
+ op_ifmarker:1;
+ uint64_t op_target_recv_data_segment_length;
+ uint64_t op_initiator_recv_data_segment_length;
+ uint64_t op_max_outstanding_unexpected_pdus;
+} iser_op_params_t;
+
+/*
+ * iSER connection information
+ */
+typedef struct iser_conn_s {
+ kmutex_t ic_lock;
+ kcondvar_t ic_stage_cv;
+ iser_conn_type_t ic_type;
+ iser_chan_t *ic_chan;
+ iser_conn_stage_t ic_stage; /* for iSER-assisted mode */
+ iser_op_params_t ic_op_params;
+ idm_conn_t *ic_idmc;
+ idm_svc_t *ic_idms;
+} iser_conn_t;
+
+/*
+ * iser_state_t is the iser driver's state structure, encoding all of
+ * the state information.
+ */
+typedef struct iser_state_s {
+ dev_info_t *is_dip;
+ int is_instance;
+
+ /* IDM open ref counter and lock */
+ kmutex_t is_refcnt_lock;
+ int is_open_refcnt;
+
+ ibt_clnt_hdl_t is_ibhdl; /* IBT handle */
+
+ /* list of HCAs */
+ kmutex_t is_hcalist_lock; /* locked by is_hcalist_lock */
+ list_t is_hcalist;
+ uint_t is_num_hcas;
+
+ /* Connection list */
+ iser_conn_t *is_connlist;
+
+ /* Global work request handle cache */
+ kmem_cache_t *iser_wr_cache;
+} iser_state_t;
+
+typedef enum {
+ ISER_STATUS_SUCCESS = 0,
+ ISER_STATUS_FAIL
+} iser_status_t;
+
+int iser_idm_register();
+
+iser_status_t iser_register_service(idm_svc_t *idm_svc);
+
+iser_status_t iser_bind_service(idm_svc_t *idm_svc);
+
+void iser_unbind_service(idm_svc_t *idm_svc);
+
+void iser_deregister_service(idm_svc_t *idm_svc);
+
+boolean_t iser_path_exists(idm_sockaddr_t *laddr, idm_sockaddr_t *raddr);
+
+iser_chan_t *iser_channel_alloc(idm_sockaddr_t *laddr, idm_sockaddr_t *raddr);
+
+iser_status_t iser_channel_open(iser_chan_t *chan);
+
+void iser_channel_close(iser_chan_t *chan);
+
+void iser_channel_free(iser_chan_t *chan);
+
+void iser_internal_conn_destroy(iser_conn_t *ic);
+
+/* IDM refcnt utilities for the iSER tgt svc handle */
+void iser_tgt_svc_hold(iser_svc_t *is);
+void iser_tgt_svc_rele(iser_svc_t *is);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ISER_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ib/clients/iser/iser_cm.h Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ISER_CM_H
+#define _ISER_CM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+#include <sys/iscsi_protocol.h>
+
+/*
+ * iser_cm.h
+ * Definitions and functions related to the Communications Manager
+ */
+ibt_cm_status_t iser_ib_cm_handler(void *cm_private, ibt_cm_event_t *event,
+ ibt_cm_return_args_t *ret_args, void *ret_priv_data,
+ ibt_priv_data_len_t ret_len_max);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ISER_CM_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ib/clients/iser/iser_ib.h Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,203 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ISER_IB_H
+#define _ISER_IB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/iscsi_protocol.h>
+
+/*
+ * iser_ib.h
+ * Definitions and macros related to iSER InfiniBand transport,
+ * and the use of the Solaris IBTI (InfiniBand Transport Interface).
+ */
+
+struct iser_state_s;
+
+extern struct iser_state_s *iser_state;
+extern ddi_taskq_t *iser_taskq;
+
+/*
+ * iser_hca_s holds all the information about the Infinband HCAs in use.
+ */
+typedef struct iser_hca_s {
+ list_node_t hca_node;
+ boolean_t hca_failed;
+ ibt_clnt_hdl_t hca_clnt_hdl;
+ ibt_hca_hdl_t hca_hdl;
+ ibt_hca_attr_t hca_attr;
+ ibt_pd_hdl_t hca_pdhdl;
+ ib_guid_t hca_guid;
+ uint_t hca_num_ports;
+ ibt_hca_portinfo_t *hca_port_info;
+ uint_t hca_port_info_sz;
+
+ /* Per PD (per HCA) message and data buffer caches */
+ struct iser_vmem_mr_pool_s *hca_msg_pool; /* Use iser_msg_cache */
+ kmem_cache_t *iser_msg_cache;
+ struct iser_vmem_mr_pool_s *hca_buf_pool; /* Use iser_buf_cache */
+ kmem_cache_t *iser_buf_cache;
+} iser_hca_t;
+
+/* RQ low water mark percentage */
+#define ISER_IB_RQ_LWM_PCT 80
+
+/* Maximum number of WRs to post on the RQ at a time */
+#define ISER_IB_RQ_POST_MAX 64
+
+/* Maximum number of SCQ WCs to poll at a time */
+#define ISER_IB_SCQ_POLL_MAX 8
+
+/*
+ * iser_qp_t encodes data related to a Queue Pair (QP) in use by
+ * iSER. Each QP consists of two Work Queues (WQs), one Send Queue
+ * (SQ) and on Receive Queue (RQ). Most of the data in the QP
+ * handle relates to monitoring the posted depth of the RQ.
+ *
+ * Note that we are explicitly using slightly less than a power-of-2
+ * number for our queue sizes. The HCA driver will round up for us,
+ * and this affords us some headroom.
+ */
+#ifdef _LP64
+#define ISER_IB_RECVQ_SIZE 400
+#else
+/* Memory is very limited on 32-bit kernels */
+#define ISER_IB_RECVQ_SIZE 100
+#endif
+#define ISER_IB_SENDQ_SIZE 2000
+#define ISER_IB_SGLIST_SIZE 1
+
+#define ISER_IB_DEFAULT_IRD 2
+#define ISER_IB_DEFAULT_ORD 4
+
+typedef struct iser_qp_s {
+ kmutex_t qp_lock;
+ uint_t sq_size;
+ uint_t rq_size;
+ uint32_t rq_depth;
+ uint32_t rq_level;
+ uint32_t rq_min_post_level;
+ uint32_t rq_lwm;
+ boolean_t rq_taskqpending;
+} iser_qp_t;
+
+/*
+ * iSER RC channel information
+ */
+typedef struct iser_chan_s {
+ kmutex_t ic_lock;
+
+ /* IBT channel handle */
+ ibt_channel_hdl_t ic_chanhdl;
+
+ /* local and remote IP addresses and port numbers */
+ ibt_ip_addr_t ic_localip;
+ ibt_ip_addr_t ic_remoteip;
+ in_port_t ic_lport;
+ in_port_t ic_rport;
+
+ /*
+ * The local HCA GUID, the service ID, Destination GID, Source GID
+ * the primary hca port on which the channel is connected is
+ * stored in ic_ibt_path
+ */
+ ibt_path_info_t ic_ibt_path;
+
+ /*
+ * Information related to the HCA handle and the queues.
+ */
+ iser_hca_t *ic_hca;
+ ibt_cq_hdl_t ic_sendcq;
+ ibt_cq_hdl_t ic_recvcq;
+ uint_t ic_sendcq_sz;
+ uint_t ic_recvcq_sz;
+ iser_qp_t ic_qp;
+
+ /* Used to track the number of WRs posted on the SQ */
+ kmutex_t ic_sq_post_lock;
+ uint_t ic_sq_post_count;
+ uint_t ic_sq_max_post_count;
+
+ /*
+ * To help identify the channel end point and some connection
+ * specifics, maintain a pointer to the connection on which
+ * this channel originated
+ */
+ struct iser_conn_s *ic_conn;
+} iser_chan_t;
+
+int iser_ib_init(void);
+
+int iser_ib_fini(void);
+
+int iser_ib_register_service(idm_svc_t *idm_svc);
+
+int iser_ib_bind_service(idm_svc_t *idm_svc);
+
+void iser_ib_unbind_service(idm_svc_t *idm_svc);
+
+void iser_ib_deregister_service(idm_svc_t *idm_svc);
+
+void iser_ib_conv_sockaddr2ibtaddr(idm_sockaddr_t *saddr,
+ ibt_ip_addr_t *ibt_addr);
+
+void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
+ ibt_ip_addr_t *ibt_addr, in_port_t port);
+
+int iser_ib_get_paths(
+ ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, ibt_path_info_t *path,
+ ibt_path_ip_src_t *path_src_ip);
+
+iser_chan_t *iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip,
+ ibt_ip_addr_t *remote_ip);
+
+int iser_ib_open_rc_channel(iser_chan_t *chan);
+
+void iser_ib_close_rc_channel(iser_chan_t *chan);
+
+void iser_ib_free_rc_channel(iser_chan_t *chan);
+
+void iser_ib_post_recv(void *arg);
+
+void iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl, void *arg);
+
+void iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl, void *arg);
+
+void iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl,
+ ibt_async_code_t code, ibt_async_event_t *event);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ISER_IB_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ib/clients/iser/iser_idm.h Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ISER_IDM_H
+#define _ISER_IDM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/idm/idm.h>
+#include <sys/idm/idm_text.h>
+
+/*
+ * iSER transport routines
+ *
+ * All transport functions except iser_tgt_svc_create() are called through
+ * the ops vector, iser_tgt_svc_create() is called from the async handler
+ * inaddition to being called by the ULP
+ */
+
+/*
+ * For small transfers, it is both CPU and time intensive to register the
+ * memory used for the RDMA, So the transport does bcopy into memory that
+ * is already pre-registered and maintained in a cache.
+ */
+#define ISER_BCOPY_THRESHOLD 0x20000 /* 128k */
+
+idm_status_t iser_tgt_svc_create(idm_svc_req_t *sr, struct idm_svc_s *is);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ISER_IDM_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ib/clients/iser/iser_resource.h Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ISER_RESOURCE_H
+#define _ISER_RESOURCE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+#include <sys/iscsi_protocol.h>
+
+#define ISER_CACHE_NAMELEN 31 /* KMEM_CACHE_NAMELEN */
+
+/* Default message lengths */
+#define ISER_MAX_CTRLPDU_LEN 0x4000
+#define ISER_MAX_TEXTPDU_LEN 0x4000
+
+/* Default data buffer length */
+#define ISER_DEFAULT_BUFLEN 0x20000
+
+/*
+ * iser_resource.h
+ * Definitions and functions related to set up buffer allocation from
+ * IBT memory regions and managment of work requessts.
+ */
+
+struct iser_hca_s;
+
+/*
+ * Memory regions
+ */
+typedef struct iser_mr_s {
+ ibt_mr_hdl_t is_mrhdl;
+ ib_vaddr_t is_mrva;
+ ib_memlen_t is_mrlen;
+ ibt_lkey_t is_mrlkey;
+ ibt_rkey_t is_mrrkey;
+ avl_node_t is_avl_ln;
+} iser_mr_t;
+
+typedef struct iser_vmem_mr_pool_s {
+ iser_hca_t *ivmp_hca;
+ ibt_mr_flags_t ivmp_mr_flags;
+ ib_memlen_t ivmp_chunksize;
+ vmem_t *ivmp_vmem;
+ uint64_t ivmp_total_size;
+ uint64_t ivmp_max_total_size;
+ avl_tree_t ivmp_mr_list;
+ kmutex_t ivmp_mutex;
+} iser_vmem_mr_pool_t;
+
+#define ISER_MR_QUANTSIZE 0x400
+#define ISER_MIN_CHUNKSIZE 0x100000 /* 1MB */
+
+#ifdef _LP64
+#define ISER_BUF_MR_CHUNKSIZE 0x8000000 /* 128MB */
+#define ISER_BUF_POOL_MAX 0x40000000 /* 1GB */
+#else
+/* Memory is very limited on 32-bit kernels */
+#define ISER_BUF_MR_CHUNKSIZE 0x400000 /* 4MB */
+#define ISER_BUF_POOL_MAX 0x4000000 /* 64MB */
+#endif
+#define ISER_BUF_MR_FLAGS IBT_MR_ENABLE_LOCAL_WRITE | \
+ IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE
+#ifdef _LP64
+#define ISER_MSG_MR_CHUNKSIZE 0x2000000 /* 32MB */
+#define ISER_MSG_POOL_MAX 0x10000000 /* 256MB */
+#else
+#define ISER_MSG_MR_CHUNKSIZE 0x100000 /* 1MB */
+#define ISER_MSG_POOL_MAX 0x2000000 /* 32MB */
+#endif
+#define ISER_MSG_MR_FLAGS IBT_MR_ENABLE_LOCAL_WRITE
+
+iser_vmem_mr_pool_t *iser_vmem_create(const char *name, iser_hca_t *hca,
+ ib_memlen_t chunksize, uint64_t max_total_size,
+ ibt_mr_flags_t arena_mr_flags);
+void iser_vmem_destroy(iser_vmem_mr_pool_t *vmr_pool);
+void *iser_vmem_alloc(iser_vmem_mr_pool_t *vmr_pool, size_t size);
+void iser_vmem_free(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size);
+idm_status_t iser_vmem_mr(iser_vmem_mr_pool_t *vmr_pool,
+ void *vaddr, size_t size, iser_mr_t *mr);
+
+/*
+ * iSER work request structure encodes an iSER Send Queue work request
+ * context, with pointers to relevant resources related to the work request.
+ * We hold a pointer to either an IDM PDU handle, an iSER message handle
+ * or an IDM buffer handle. These are allocated from a kmem_cache when
+ * we post send WR's, and freed back when the completion is polled.
+ */
+typedef enum {
+ ISER_WR_SEND,
+ ISER_WR_RDMAW,
+ ISER_WR_RDMAR,
+ ISER_WR_UNDEFINED
+} iser_wr_type_t;
+
+typedef struct iser_wr_s {
+ iser_wr_type_t iw_type;
+ struct iser_msg_s *iw_msg;
+ struct idm_buf_s *iw_buf;
+ struct idm_pdu_s *iw_pdu;
+} iser_wr_t;
+
+int iser_wr_cache_constructor(void *mr, void *arg, int flags);
+void iser_wr_cache_destructor(void *mr, void *arg);
+iser_wr_t *iser_wr_get();
+void iser_wr_free(iser_wr_t *iser_wr);
+
+/*
+ * iSER message structure for iSCSI Control PDUs, constructor and
+ * destructor routines, and utility routines for allocating and
+ * freeing message handles.
+ */
+typedef struct iser_msg_s {
+ struct iser_msg_s *nextp; /* for building lists */
+ kmem_cache_t *cache; /* back pointer for cleanup */
+ ibt_wr_ds_t msg_ds; /* SGEs for hdr and text */
+ ibt_mr_hdl_t mrhdl[2]; /* MR handles for each SGE */
+} iser_msg_t;
+
+int iser_msg_cache_constructor(void *mr, void *arg, int flags);
+void iser_msg_cache_destructor(void *mr, void *arg);
+iser_msg_t *iser_msg_get(iser_hca_t *hca, int num, int *ret);
+void iser_msg_free(iser_msg_t *msg);
+
+/*
+ * iSER data buffer structure for iSER RDMA operations, constructor and
+ * destructor routines, and utility routines for allocating and freeing
+ * buffer handles.
+ */
+typedef struct iser_buf_s {
+ kmem_cache_t *cache; /* back pointer for cleanup */
+ void *buf; /* buffer */
+ uint64_t buflen;
+ iser_mr_t *iser_mr; /* MR handle for this buffer */
+ ibt_wr_ds_t buf_ds; /* SGE for this buffer */
+ ibt_send_wr_t buf_wr; /* DEBUG, copy of wr from request */
+ ibt_wc_t buf_wc; /* DEBUG, copy of wc from completion */
+ timespec_t buf_constructed;
+ timespec_t buf_destructed;
+} iser_buf_t;
+
+int iser_buf_cache_constructor(void *mr, void *arg, int flags);
+void iser_buf_cache_destructor(void *mr, void *arg);
+
+void iser_init_hca_caches(struct iser_hca_s *hca);
+void iser_fini_hca_caches(struct iser_hca_s *hca);
+
+/* Routines to register in-place memory passed on an existing idb */
+int iser_reg_rdma_mem(struct iser_hca_s *hca, idm_buf_t *idb);
+void iser_dereg_rdma_mem(struct iser_hca_s *hca, idm_buf_t *idb);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ISER_RESOURCE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ib/clients/iser/iser_xfer.h Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,157 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ISER_XFER_H
+#define _ISER_XFER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/ibtl_types.h>
+#include <sys/iscsi_protocol.h>
+
+/*
+ * iser_xfer.h
+ * Definitions and functions related to data transfer across the RC channel
+ * This includes the posting of the Hello Message, the HelloReply Message, the
+ * RC Send Message for the iSCSI Control PDU.
+ */
+
+/*
+ * iser_private_data_s contains parameters relating to the iSER connection and
+ * IB options support status. This data conforms to the 'iSER CM REQ Message
+ * Private Data Format' from the Annex A12 - Support for iSCSI Extensions for
+ * RDMA.
+ */
+#pragma pack(1)
+typedef struct iser_private_data_s {
+ uint8_t ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ];
+#if defined(_BIT_FIELDS_LTOH)
+ uint32_t rsvd1 :30,
+ sie :1,
+ zbvae :1;
+#elif defined(_BIT_FIELDS_HTOL)
+ uint32_t zbvae :1,
+ sie :1,
+ rsvd1 :30;
+#else
+#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
+#endif /* _BIT_FIELDS_LTOH */
+ uint8_t rsvd2[52];
+} iser_private_data_t;
+
+/* iSER Message Opcodes */
+#define ISER_OPCODE_CTRL_TYPE_PDU 1
+#define ISER_OPCODE_HELLO_MSG 2
+#define ISER_OPCODE_HELLOREPLY_MSG 3
+
+/*
+ * When ZBVA is not supported, both the initiator and the target shall use the
+ * expanded iSER header as defined in the IB Spec Table 540 for iSCSI control-
+ * type PDUs in the connection
+ */
+typedef struct iser_ctrl_hdr_s {
+#if defined(_BIT_FIELDS_LTOH)
+ uint8_t rsvd1: 2,
+ rsv_flag: 1, /* RStag valid bit */
+ wsv_flag: 1, /* WStag valid bit */
+ opcode: 4; /* iSER opcode */
+ uint8_t rsvd[3];
+#elif defined(_BIT_FIELDS_HTOL)
+ uint8_t opcode: 4,
+ wsv_flag: 1,
+ rsv_flag: 1,
+ rsvd1: 2;
+ uint8_t rsvd[3];
+#else
+#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
+#endif /* _BIT_FIELDS_LTOH */
+ uint32_t wstag; /* IB R-key for SCSI Write */
+ uint64_t wva; /* IB VA for SCSI Write */
+ uint32_t rstag; /* IB R-key for SCSI Read */
+ uint64_t rva; /* IB VA for SCSI Read */
+} iser_ctrl_hdr_t;
+
+/* iSER Header Format for the iSER Hello Message */
+typedef struct iser_hello_hdr_s {
+#if defined(_BIT_FIELDS_LTOH)
+ uint8_t rsvd1 : 4,
+ opcode : 4;
+ uint8_t minver : 4,
+ maxver : 4;
+#elif defined(_BIT_FIELDS_HTOL)
+ uint8_t opcode : 4,
+ rsvd1 : 4;
+ uint8_t maxver : 4,
+ minver : 4;
+#else
+#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
+#endif /* _BIT_FIELDS_LTOH */
+ uint16_t iser_ird;
+ uint32_t rsvd2[2];
+} iser_hello_hdr_t;
+
+/* iSER Header Format for the iSER HelloReply Message */
+typedef struct iser_helloreply_hdr_s {
+#if defined(_BIT_FIELDS_LTOH)
+ uint8_t flag : 1,
+ rsvd1 : 3,
+ opcode : 4;
+ uint8_t curver : 4,
+ maxver : 4;
+#elif defined(_BIT_FIELDS_HTOL)
+ uint8_t opcode : 4,
+ rsvd1 : 3,
+ flag : 1;
+ uint8_t maxver : 4,
+ curver : 4;
+#else
+#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
+#endif /* _BIT_FIELDS_LTOH */
+ uint16_t iser_ord;
+ uint32_t rsvd2[2];
+} iser_helloreply_hdr_t;
+#pragma pack()
+
+struct iser_state_s;
+
+int iser_xfer_hello_msg(iser_chan_t *chan);
+
+int iser_xfer_helloreply_msg(iser_chan_t *chan);
+
+int iser_xfer_ctrlpdu(iser_chan_t *chan, idm_pdu_t *pdu);
+
+int iser_xfer_buf_to_ini(idm_task_t *idt, idm_buf_t *buf);
+
+int iser_xfer_buf_from_ini(idm_task_t *idt, idm_buf_t *buf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ISER_XFER_H */
--- a/usr/src/uts/common/sys/idm/idm.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/sys/idm/idm.h Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -41,19 +41,38 @@
IDM_STATUS_SUSPENDED,
IDM_STATUS_HEADER_DIGEST,
IDM_STATUS_DATA_DIGEST,
- IDM_STATUS_PROTOCOL_ERROR
+ IDM_STATUS_PROTOCOL_ERROR,
+ IDM_STATUS_LOGIN_FAIL
} idm_status_t;
+
typedef enum {
CN_CONNECT_ACCEPT = 1, /* Target only */
- CN_LOGIN_FAIL, /* Target only */
+ CN_LOGIN_FAIL,
CN_READY_FOR_LOGIN, /* Initiator only */
CN_FFP_ENABLED,
CN_FFP_DISABLED,
CN_CONNECT_LOST,
- CN_CONNECT_DESTROY
+ CN_CONNECT_DESTROY,
+ CN_CONNECT_FAIL,
+ CN_MAX
} idm_client_notify_t;
+#ifdef IDM_CN_NOTIFY_STRINGS
+static const char *idm_cn_strings[CN_MAX + 1] = {
+ "CN_UNDEFINED",
+ "CN_CONNECT_ACCEPT",
+ "CN_LOGIN_FAIL",
+ "CN_READY_FOR_LOGIN",
+ "CN_FFP_ENABLED",
+ "CN_FFP_DISABLED",
+ "CN_CONNECT_LOST",
+ "CN_CONNECT_DESTROY",
+ "CN_CONNECT_FAIL",
+ "CN_MAX"
+};
+#endif
+
typedef enum {
FD_CONN_FAIL,
FD_CONN_LOGOUT,
@@ -211,7 +230,11 @@
SAS_IDM_TASK,
SAS_ISCSIT_TGT,
SAS_ISCSIT_SESS,
- SAS_ISCSIT_LOGIN
+ SAS_ISCSIT_LOGIN,
+ SAS_ISCSI_CMD,
+ SAS_ISCSI_SESS,
+ SAS_ISCSI_CONN,
+ SAS_ISCSI_LOGIN
} sm_audit_sm_type_t;
typedef struct {
@@ -269,6 +292,9 @@
idm_ini_conn_disconnect(idm_conn_t *ic);
void
+idm_ini_conn_disconnect_sync(idm_conn_t *ic);
+
+void
idm_ini_conn_destroy(idm_conn_t *ic);
/*
@@ -337,7 +363,7 @@
idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
-idm_status_t
+void
idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl);
/*
@@ -365,6 +391,24 @@
idm_buf_t *
idm_buf_find(void *lbuf, size_t data_offset);
+void
+idm_bufpat_set(idm_buf_t *idb);
+
+boolean_t
+idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type);
+
+extern boolean_t idm_pattern_checking;
+
+#define IDM_BUFPAT_SET(CHK_BUF) \
+ if (idm_pattern_checking && (CHK_BUF)->idb_bufalloc) { \
+ idm_bufpat_set(CHK_BUF); \
+ }
+
+#define IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE) \
+ if (idm_pattern_checking) { \
+ (void) idm_bufpat_check(CHK_BUF, CHK_LEN, CHK_TYPE); \
+ }
+
/*
* Task services
*/
@@ -389,6 +433,9 @@
idm_task_t *
idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt);
+idm_task_t *
+idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt);
+
void *
idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle);
@@ -405,6 +452,9 @@
idm_pdu_t *
idm_pdu_alloc(uint_t hdrlen, uint_t datalen);
+idm_pdu_t *
+idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen);
+
void
idm_pdu_free(idm_pdu_t *pdu);
--- a/usr/src/uts/common/sys/idm/idm_conn_sm.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/sys/idm/idm_conn_sm.h Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IDM_CONN_SM_H_
@@ -224,7 +224,7 @@
typedef enum {
CA_TX_PROTOCOL_ERROR, /* Send "protocol error" to state machine */
CA_RX_PROTOCOL_ERROR, /* Send "protocol error" to state machine */
- CA_FORWARD, /* State machine event and foward to client */
+ CA_FORWARD, /* State machine event and forward to client */
CA_DROP /* Drop PDU */
} idm_pdu_event_action_t;
@@ -233,6 +233,7 @@
idm_conn_event_t iec_event;
uintptr_t iec_info;
idm_pdu_event_type_t iec_pdu_event_type;
+ boolean_t iec_pdu_forwarded;
} idm_conn_event_ctx_t;
idm_status_t
@@ -248,6 +249,13 @@
void
idm_conn_event(struct idm_conn_s *ic, idm_conn_event_t event, uintptr_t data);
+void
+idm_conn_event(struct idm_conn_s *ic, idm_conn_event_t event, uintptr_t data);
+
+void
+idm_conn_event_locked(struct idm_conn_s *ic, idm_conn_event_t event,
+ uintptr_t event_info, idm_pdu_event_type_t pdu_event_type);
+
idm_status_t
idm_conn_reinstate_event(struct idm_conn_s *old_ic, struct idm_conn_s *new_ic);
--- a/usr/src/uts/common/sys/idm/idm_impl.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/sys/idm/idm_impl.h Tue Mar 24 17:50:49 2009 -0600
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IDM_IMPL_H_
@@ -223,6 +223,8 @@
int idt_tx_to_ini_done;
int idt_rx_from_ini_start;
int idt_rx_from_ini_done;
+ int idt_tx_bytes; /* IDM_CONN_USE_SCOREBOARD */
+ int idt_rx_bytes; /* IDM_CONN_USE_SCOREBOARD */
uint32_t idt_exp_datasn; /* expected datasn */
uint32_t idt_exp_rttsn; /* expected rttsn */
@@ -267,15 +269,36 @@
size_t idb_xfer_len; /* Current requested xfer len */
void *idb_buf_private; /* transport-specific buf handle */
void *idb_reg_private; /* transport-specific reg handle */
+ void *idb_bufptr; /* transport-specific bcopy pointer */
+ boolean_t idb_bufbcopy; /* true if bcopy required */
+
idm_buf_cb_t *idb_buf_cb; /* Data Completion Notify, tgt only */
void *idb_cb_arg; /* Client private data */
idm_task_t *idb_task_binding;
+ timespec_t idb_xfer_start;
+ timespec_t idb_xfer_done;
boolean_t idb_in_transport;
boolean_t idb_tx_thread; /* Sockets only */
iscsi_hdr_t idb_data_hdr_tmpl; /* Sockets only */
idm_status_t idb_status;
} idm_buf_t;
+typedef enum {
+ BP_CHECK_QUICK,
+ BP_CHECK_THOROUGH,
+ BP_CHECK_ASSERT
+} idm_bufpat_check_type_t;
+
+#define BUFPAT_MATCH(bc_bufpat, bc_idb) \
+ ((bufpat->bufpat_idb == bc_idb) && \
+ (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC))
+
+typedef struct idm_bufpat_s {
+ void *bufpat_idb;
+ uint32_t bufpat_bufmagic;
+ uint32_t bufpat_offset;
+} idm_bufpat_t;
+
#define PDU_MAX_IOVLEN 12
#define IDM_PDU_MAGIC 0x49504455 /* "IPDU" */
--- a/usr/src/uts/common/sys/idm/idm_transport.h Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/common/sys/idm/idm_transport.h Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -106,7 +106,7 @@
nvlist_t *negotiated_nvl);
/* Activate the negotiated key value pairs */
-typedef idm_status_t (transport_notice_key_values_op_t)(struct idm_conn_s *ic,
+typedef void (transport_notice_key_values_op_t)(struct idm_conn_s *ic,
nvlist_t *negotiated_nvl);
/* Transport capability probe */
@@ -214,6 +214,9 @@
void
idm_transport_setup(ldi_ident_t li);
+void
+idm_transport_teardown();
+
#ifdef __cplusplus
}
#endif
--- a/usr/src/uts/intel/Makefile.intel.shared Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/intel/Makefile.intel.shared Tue Mar 24 17:50:49 2009 -0600
@@ -464,7 +464,7 @@
#
# InfiniBand pseudo drivers
#
-DRV_KMODS += ib ibd rdsib sdp
+DRV_KMODS += ib ibd rdsib sdp iser
#
# LVM modules
--- a/usr/src/uts/intel/iscsi/Makefile Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/intel/iscsi/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -62,6 +62,7 @@
# Note dependancy on misc/scsi.
#
LDFLAGS += -dy -N"misc/scsi" -N"fs/sockfs" -N"sys/doorfs" -Nmisc/md5 -Nmisc/ksocket
+LDFLAGS += -N"misc/idm"
LINTFLAGS += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
LINTFLAGS64 += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/iser/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,77 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# usr/src/uts/intel/iser/Makefile
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = iser
+OBJECTS = $(ISER_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(ISER_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+LDFLAGS += -dy -Nmisc/ibtl -Nmisc/ibcm -Nmisc/idm
+CONF_SRCDIR = $(UTSBASE)/common/io/ib/clients/iser
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/uts/sparc/Makefile.sparc.shared Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/sparc/Makefile.sparc.shared Tue Mar 24 17:50:49 2009 -0600
@@ -273,7 +273,7 @@
DRV_KMODS += usbvc
DRV_KMODS += hci1394 av1394 scsa1394 dcam1394
DRV_KMODS += sbp2
-DRV_KMODS += ib ibd rdsib sdp
+DRV_KMODS += ib ibd rdsib sdp iser
DRV_KMODS += pci_pci px_pci pxb_plx pxb_bcm pcie
DRV_KMODS += i8042 kb8042 mouse8042
DRV_KMODS += fcode
--- a/usr/src/uts/sparc/iscsi/Makefile Tue Mar 24 19:19:49 2009 -0400
+++ b/usr/src/uts/sparc/iscsi/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -62,6 +62,7 @@
# Note dependancy on misc/scsi.
#
LDFLAGS += -dy -N"misc/scsi" -N"fs/sockfs" -N"sys/doorfs" -N"misc/md5" -Nmisc/ksocket
+LDFLAGS += -N"misc/idm"
LINTFLAGS += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
LINTFLAGS64 += -a -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_PTRDIFF_OVERFLOW
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/iser/Makefile Tue Mar 24 17:50:49 2009 -0600
@@ -0,0 +1,77 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# usr/src/uts/sparc/iser/Makefile
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = iser
+OBJECTS = $(ISER_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(ISER_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+LDFLAGS += -dy -Nmisc/ibtl -Nmisc/ibcm -Nmisc/idm
+CONF_SRCDIR = $(UTSBASE)/common/io/ib/clients/iser
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ