PSARC 2010/085 IPoIB Administration Enhancement
6837574 IPoIB Administration Enhancement - PSARC 2010/085
6864899 IB datalink names need to be consistent between cluster controllers
6855737 cfgadm status for ibd attachment points gets to inconsistent state
6883212 ibd: add Brussels framework support
6927048 IBD driver should be hardened to handle late HCA port initialization issue
6827237 Fix warlock errors in ibnex
--- a/usr/src/cmd/Makefile Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -195,6 +195,7 @@
hotplug \
hotplugd \
hwdata \
+ ibd_upgrade \
id \
idmap \
infocmp \
--- a/usr/src/cmd/Makefile.check Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/Makefile.check Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
include ../Makefile.master
@@ -43,6 +42,7 @@
fcinfo \
fcoesvc \
fm \
+ ibd_upgrade \
intrd \
iscsid \
iscsitsvc \
--- a/usr/src/cmd/datadm/Makefile Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/datadm/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-#ident "%Z%%M% %I% %E% SMI"
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
#
# cmd/datadm/Makefile
#
@@ -38,7 +34,7 @@
SRCS = $(OBJS:%.o=%.c)
CFLAGS += $(CCVERBOSE)
-LDLIBS += -ldevinfo -lsocket
+LDLIBS += -ldlpi -ldladm -lsocket
.KEEP_STATE:
--- a/usr/src/cmd/datadm/datadm.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/datadm/datadm.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,14 +19,14 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/stat.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <net/if.h>
@@ -37,7 +37,12 @@
#include <errno.h>
#include <libintl.h>
#include <locale.h>
-#include <libdevinfo.h>
+#include <fcntl.h>
+#include <libdlpi.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdllink.h>
+#include <sys/ib/ibnex/ibnex_devctl.h>
#define DATADM_OP_VIEW 0x0000
#define DATADM_OP_UPDATE 0x0001
@@ -48,7 +53,6 @@
#define DATADM_LINESZ 1024
#define DATADM_NUM_SP_TOKENS 7
#define DATADM_NUM_DAT_TOKENS 8
-#define DATADM_IA_NAME "ibd"
#define DATADM_DRV_NAME "driver_name"
#define DATADM_MAX_TOKENS 16
@@ -106,7 +110,7 @@
* are added when sp entry processing occurs. duplicate
* sp entries are not added to this list. the ia_list may
* be built statically using the information in dat.conf or
- * dynamically using libdevinfo. similar to the sp_list,
+ * dynamically. similar to the sp_list,
* the ia_list contains only unique entries.
*/
typedef struct datadm_hca_entry {
@@ -119,12 +123,12 @@
/*
* an ia_entry is created when a new ia name is encountered
* during sp_entry processing or when a new ia name is
- * discovered by datadm_fill_ia_list. ia_entry holds the ia
+ * discovered by datadm_build_ia_lists. ia_entry holds the ia
* device's instance number.
*/
typedef struct datadm_ia_entry {
datadm_entry_t iae_header;
- int iae_devnum;
+ char iae_name[MAXLINKNAMELEN];
} datadm_ia_entry_t;
/*
@@ -138,6 +142,11 @@
char *cmnt_line;
} datadm_cmnt_entry_t;
+typedef struct datadm_hca_find_by_name {
+ char *hf_name;
+ datadm_hca_entry_t *hf_hca_entry;
+} datadm_hca_find_by_name_t;
+
/*
* 2nd argument to datadm_hca_entry_find.
* hf_hca_entry is filled in if an hca_entry with
@@ -151,20 +160,20 @@
/*
* 2nd argument to datadm_ia_entry_find.
* if_ia_entry is filled in if an ia_entry with
- * a matching ia_devnum is found.
+ * a matching ia_name is found.
*/
typedef struct datadm_ia_find {
- int if_ia_devnum;
+ char *if_ia_name;
datadm_ia_entry_t *if_ia_entry;
} datadm_ia_find_t;
/*
- * this gets passed to datadm_fill_ia_list.
- * we do this to avoid regenerating the device
- * tree for each hca_entry we process.
+ * this gets passed to datadm_add_plink.
*/
typedef struct datadm_fill_ia_list {
- di_node_t ia_root_node;
+ datadm_list_t *ia_hca_list;
+ dladm_handle_t ia_dlh;
+ int ia_ibnex_fd;
int ia_sock_fd_v4;
int ia_sock_fd_v6;
} datadm_fill_ia_list_t;
@@ -183,10 +192,8 @@
static datadm_list_t datadm_conf_header;
static char *datadm_conf_header_default =
"#\n"
- "# Copyright 2004 Sun Microsystems, Inc. All rights reserved.\n"
- "# Use is subject to license terms.\n"
- "#\n"
- "# ident \"@(#)dat.conf 1.1 03/08/26 SMI\"\n"
+ "# Copyright (c) 2003, 2010, Oracle and/or its affiliates. "
+ "All rights reserved.\n"
"#\n"
"# DAT configuration file.\n"
"#\n"
@@ -215,7 +222,7 @@
static int datadm_parse_libpath(char *, datadm_sp_entry_t *);
static int datadm_parse_sp_version(char *, datadm_sp_entry_t *);
static int datadm_parse_sp_data(char *, datadm_sp_entry_t *);
-static int datadm_parse_ia_name(char *, int *);
+static int datadm_parse_ia_name(char *, char *);
/*
* utility functions
@@ -245,13 +252,13 @@
*/
static int datadm_parse_sp_conf(datadm_list_t *);
static int datadm_parse_dat_conf(datadm_list_t *);
-static int datadm_process_sp_entry(datadm_list_t *, datadm_sp_entry_t *, int);
+static int datadm_process_sp_entry(datadm_list_t *, datadm_sp_entry_t *,
+ char *);
/*
* ia devices discovery
*/
static int datadm_build_ia_lists(datadm_list_t *);
-static int datadm_fill_ia_list(datadm_hca_entry_t *, datadm_fill_ia_list_t *);
/*
* helper function for OP_REMOVE
@@ -429,27 +436,11 @@
* parses the ia_name field in dat.conf
*/
static int
-datadm_parse_ia_name(char *str, int *ia_devnum)
+datadm_parse_ia_name(char *str, char *ia_name)
{
- int len;
- int i, start;
-
- len = strlen(DATADM_IA_NAME);
- if (strncmp(str, DATADM_IA_NAME, len) != 0) {
+ if (strlen(str) >= MAXLINKNAMELEN)
return (-1);
- }
- start = i = len;
- len = strlen(str);
- if (str[i] == '\0') {
- return (-1);
- }
- for (; i < len; i++) {
- if (!isdigit(str[i])) break;
- }
- if (i != len) {
- return (-1);
- }
- *ia_devnum = atoi(str + start);
+ (void) strlcpy(ia_name, str, MAXLINKNAMELEN);
return (0);
}
@@ -803,7 +794,7 @@
static int
datadm_ia_entry_find(datadm_ia_entry_t *i1, datadm_ia_find_t *iaf)
{
- if (i1->iae_devnum == iaf->if_ia_devnum) {
+ if (strcmp(i1->iae_name, iaf->if_ia_name) == 0) {
iaf->if_ia_entry = i1;
return (1);
}
@@ -930,7 +921,7 @@
*/
static int
datadm_process_sp_entry(datadm_list_t *hca_list, datadm_sp_entry_t *sp_entry,
- int ia_devnum)
+ char *ia_name)
{
datadm_hca_find_t hca_find;
datadm_ia_find_t ia_find;
@@ -963,10 +954,10 @@
} else {
hca_entry = hca_find.hf_hca_entry;
}
- if (ia_devnum == -1) {
+ if (ia_name == NULL) {
goto put_sp_entry;
}
- ia_find.if_ia_devnum = ia_devnum;
+ ia_find.if_ia_name = ia_name;
ia_find.if_ia_entry = NULL;
(void) datadm_walk_list(&hca_entry->he_ia_list,
(int (*)(datadm_entry_t *, void *))datadm_ia_entry_find, &ia_find);
@@ -982,7 +973,7 @@
if (ia_entry == NULL) {
return (-1);
}
- ia_entry->iae_devnum = ia_devnum;
+ (void) strlcpy(ia_entry->iae_name, ia_name, MAXLINKNAMELEN);
datadm_enqueue_entry(&hca_entry->he_ia_list,
(datadm_entry_t *)ia_entry);
}
@@ -1069,7 +1060,7 @@
}
retval = datadm_process_sp_entry(hca_list,
- sp_entry, -1);
+ sp_entry, NULL);
if (retval != 0) {
datadm_free_sp_entry(sp_entry);
if (retval == 1) {
@@ -1168,7 +1159,7 @@
}
if (token_count == DATADM_NUM_DAT_TOKENS) {
int i = 0;
- int ia_devnum = -1;
+ char ia_name[MAXLINKNAMELEN];
/*
* we stop saving comment lines once
@@ -1195,7 +1186,7 @@
* does not belong to an
* sp_entry
*/
- arg = (void *)&ia_devnum;
+ arg = (void *)ia_name;
} else {
arg = (void *)sp_entry;
}
@@ -1217,10 +1208,12 @@
* doing update
*/
if (datadm_args.da_op_type == DATADM_OP_UPDATE) {
- ia_devnum = -1;
+ retval = datadm_process_sp_entry(hca_list,
+ sp_entry, NULL);
+ } else {
+ retval = datadm_process_sp_entry(hca_list,
+ sp_entry, ia_name);
}
- retval = datadm_process_sp_entry(hca_list, sp_entry,
- ia_devnum);
if (retval != 0) {
datadm_free_sp_entry(sp_entry);
if (retval == 1) {
@@ -1249,96 +1242,6 @@
}
/*
- * discovers all ibd devices under a particular hca
- */
-static int
-datadm_fill_ia_list(datadm_hca_entry_t *hca, datadm_fill_ia_list_t *args)
-{
- di_node_t root_node;
- di_node_t hca_node;
- int retval = 0;
- int sv4, sv6;
-
- root_node = args->ia_root_node;
- sv4 = args->ia_sock_fd_v4;
- sv6 = args->ia_sock_fd_v6;
-
- hca_node = di_drv_first_node(hca->he_name, root_node);
- if (hca_node == DI_NODE_NIL) {
- return (0);
- }
- while (hca_node != DI_NODE_NIL) {
- di_node_t ibd_node;
-
- ibd_node = di_drv_first_node(DATADM_IA_NAME, hca_node);
- while (ibd_node != DI_NODE_NIL) {
- datadm_ia_find_t ia_find;
- datadm_ia_entry_t *ia_entry;
- struct lifreq req;
- int devnum, rval;
-
- if (hca_node != di_parent_node(ibd_node)) {
- ibd_node = di_drv_next_node(ibd_node);
- continue;
- }
- devnum = di_instance(ibd_node);
- if (devnum == -1) {
- ibd_node = di_drv_next_node(ibd_node);
- continue;
- }
-
- (void) snprintf(req.lifr_name, sizeof (req.lifr_name),
- "%s%d", DATADM_IA_NAME, devnum);
- /*
- * we don't really need to know the ip address.
- * we just want to check if the device is plumbed
- * or not.
- */
- rval = ioctl(sv4, SIOCGLIFADDR, (caddr_t)&req);
- if (rval != 0) {
- /*
- * we try v6 if the v4 address isn't found.
- */
- rval = ioctl(sv6, SIOCGLIFADDR, (caddr_t)&req);
- if (rval != 0) {
- ibd_node = di_drv_next_node(ibd_node);
- continue;
- }
- }
- ia_find.if_ia_devnum = devnum;
- ia_find.if_ia_entry = NULL;
- (void) datadm_walk_list(&hca->he_ia_list,
- (int (*)(datadm_entry_t *, void *))
- datadm_ia_entry_find, &ia_find);
-
- if (ia_find.if_ia_entry == NULL) {
- /*
- * we insert an ia entry only if
- * it is unique.
- */
- ia_entry = datadm_alloc_ia_entry();
- if (ia_entry == NULL) {
- retval = -1;
- break;
- }
- ia_entry->iae_devnum = devnum;
- datadm_enqueue_entry(&hca->he_ia_list,
- (datadm_entry_t *)ia_entry);
- } else {
- ia_entry = ia_find.if_ia_entry;
- }
- ibd_node = di_drv_next_node(ibd_node);
- }
- hca_node = di_drv_next_node(hca_node);
- }
- if (retval != 0) {
- datadm_free_list(&hca->he_ia_list,
- (void (*)(datadm_entry_t *))datadm_free_ia_entry);
- }
- return (0);
-}
-
-/*
* used by OP_REMOVE to invalidate common sp entries between hl1 and hl2.
* invalid sp entries will be ignored by datadm_generate_dat_conf.
*/
@@ -1382,45 +1285,146 @@
}
}
+static int
+datadm_hca_entry_find_by_name(datadm_hca_entry_t *h1,
+ datadm_hca_find_by_name_t *hf)
+{
+ if (datadm_str_match(h1->he_name, hf->hf_name)) {
+ hf->hf_hca_entry = h1;
+ return (1);
+ }
+ return (0);
+}
+
+datadm_hca_entry_t *
+datadm_hca_lookup_by_name(datadm_list_t *hca_list, char *hca_driver_name)
+{
+ datadm_hca_find_by_name_t hf;
+
+ hf.hf_name = hca_driver_name;
+ hf.hf_hca_entry = NULL;
+ (void) datadm_walk_list(hca_list,
+ (int (*)(datadm_entry_t *, void *))datadm_hca_entry_find_by_name,
+ &hf);
+ return (hf.hf_hca_entry);
+}
+
+static boolean_t
+datadm_add_plink(char *linkname, datadm_fill_ia_list_t *ia_args)
+{
+ datalink_class_t class;
+ datalink_id_t linkid;
+ dladm_ib_attr_t ib_attr;
+ ibnex_ctl_query_hca_t query_hca;
+ datadm_hca_entry_t *hca;
+ struct lifreq req;
+ datadm_ia_find_t ia_find;
+ datadm_ia_entry_t *ia_entry;
+
+ if ((dladm_name2info(ia_args->ia_dlh, linkname, &linkid, NULL, &class,
+ NULL) != DLADM_STATUS_OK) ||
+ (class != DATALINK_CLASS_PART) ||
+ (dladm_part_info(ia_args->ia_dlh, linkid, &ib_attr,
+ DLADM_OPT_ACTIVE) != DLADM_STATUS_OK)) {
+ return (B_FALSE);
+ }
+
+ (void) strlcpy(req.lifr_name, linkname, sizeof (req.lifr_name));
+ /*
+ * we don't really need to know the ip address.
+ * we just want to check if the device is plumbed
+ * or not.
+ */
+ if (ioctl(ia_args->ia_sock_fd_v4, SIOCGLIFADDR, (caddr_t)&req) != 0) {
+ /*
+ * we try v6 if the v4 address isn't found.
+ */
+ if (ioctl(ia_args->ia_sock_fd_v6, SIOCGLIFADDR,
+ (caddr_t)&req) != 0)
+ return (B_FALSE);
+ }
+
+ bzero(&query_hca, sizeof (query_hca));
+ query_hca.hca_guid = ib_attr.dia_hca_guid;
+ if (ioctl(ia_args->ia_ibnex_fd, IBNEX_CTL_QUERY_HCA, &query_hca) == -1)
+ return (B_FALSE);
+
+ if ((hca = datadm_hca_lookup_by_name(ia_args->ia_hca_list,
+ query_hca.hca_info.hca_driver_name)) == NULL)
+ return (B_FALSE);
+
+ ia_find.if_ia_name = linkname;
+ ia_find.if_ia_entry = NULL;
+ (void) datadm_walk_list(&hca->he_ia_list,
+ (int (*)(datadm_entry_t *, void *))
+ datadm_ia_entry_find, &ia_find);
+
+ if (ia_find.if_ia_entry == NULL) {
+ /*
+ * we insert an ia entry only if
+ * it is unique.
+ */
+ ia_entry = datadm_alloc_ia_entry();
+ if (ia_entry != NULL) {
+ (void) strlcpy(ia_entry->iae_name, linkname,
+ MAXLINKNAMELEN);
+ datadm_enqueue_entry(&hca->he_ia_list,
+ (datadm_entry_t *)ia_entry);
+ }
+ }
+
+ return (B_FALSE);
+}
+
/*
- * applies datadm_fill_ia_list on each hca_list element
+ * build ia lists for each hca_list element
*/
static int
datadm_build_ia_lists(datadm_list_t *hca_list)
{
+ dladm_handle_t dlh;
datadm_fill_ia_list_t ia_args;
- di_node_t root_node;
- int retval = 0;
- int sv4, sv6;
+ int rv = -1;
+ int fd = -1;
+ int sv4 = -1;
+ int sv6 = -1;
- root_node = di_init("/", DINFOCPYALL);
- if (root_node == DI_NODE_NIL) {
- perror("datadm: di_init");
+ if (dladm_open(&dlh) != DLADM_STATUS_OK)
return (-1);
+
+ if ((fd = open(IBNEX_DEVCTL_DEV, O_RDONLY)) < 0)
+ goto out;
+
+ if ((sv4 = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ perror("datadm: socket");
+ goto out;
}
- sv4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (sv4 < 0) {
+
+ if ((sv6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
perror("datadm: socket");
- di_fini(root_node);
- return (-1);
+ goto out;
}
- sv6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (sv6 < 0) {
- perror("datadm: socket");
- di_fini(root_node);
- return (-1);
- }
- ia_args.ia_root_node = root_node;
+
+ ia_args.ia_hca_list = hca_list;
+ ia_args.ia_dlh = dlh;
+ ia_args.ia_ibnex_fd = fd;
ia_args.ia_sock_fd_v4 = sv4;
ia_args.ia_sock_fd_v6 = sv6;
- retval = datadm_walk_list(hca_list,
- (int (*)(datadm_entry_t *, void *))datadm_fill_ia_list, &ia_args);
+ dlpi_walk((boolean_t (*) (const char *, void *))datadm_add_plink,
+ &ia_args, 0);
+ rv = 0;
- (void) close(sv4);
- (void) close(sv6);
- di_fini(root_node);
- return (retval);
+out:
+ if (sv4 != -1)
+ (void) close(sv4);
+ if (sv6 != -1)
+ (void) close(sv6);
+ if (fd != -1)
+ (void) close(fd);
+
+ dladm_close(dlh);
+ return (rv);
}
static int
@@ -1430,8 +1434,8 @@
int retval;
retval = fprintf(outfile,
- "%s%d %s%d.%d %s %s %s %s%d.%d \"%s\" \"%s%s%s\"\n",
- DATADM_IA_NAME, ia_entry->iae_devnum,
+ "%s %s%d.%d %s %s %s %s%d.%d \"%s\" \"%s%s%s\"\n",
+ ia_entry->iae_name,
(sp_entry->spe_api_version.dv_name ?
sp_entry->spe_api_version.dv_name : ""),
sp_entry->spe_api_version.dv_major,
--- a/usr/src/cmd/dladm/dladm.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/dladm/dladm.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <stdio.h>
@@ -55,6 +54,7 @@
#include <libdlwlan.h>
#include <libdlvlan.h>
#include <libdlvnic.h>
+#include <libdlib.h>
#include <libdlether.h>
#include <libdliptun.h>
#include <libdlsim.h>
@@ -66,6 +66,7 @@
#include <libdlvnic.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <sys/ib/ib_types.h>
#include <sys/processor.h>
#include <netinet/in.h>
#include <arpa/inet.h>
@@ -155,6 +156,25 @@
ofmt_handle_t vs_ofmt;
} show_vnic_state_t;
+typedef struct show_part_state {
+ datalink_id_t ps_over_id;
+ char ps_part[MAXLINKNAMELEN];
+ boolean_t ps_parsable;
+ boolean_t ps_found;
+ dladm_status_t ps_status;
+ uint32_t ps_flags;
+ ofmt_handle_t ps_ofmt;
+} show_part_state_t;
+
+typedef struct show_ib_state {
+ datalink_id_t is_link_id;
+ char is_link[MAXLINKNAMELEN];
+ boolean_t is_parsable;
+ dladm_status_t is_status;
+ uint32_t is_flags;
+ ofmt_handle_t is_ofmt;
+} show_ib_state_t;
+
typedef struct show_usage_state_s {
boolean_t us_plot;
boolean_t us_parsable;
@@ -189,6 +209,8 @@
static cmdfunc_t do_show_ether;
static cmdfunc_t do_create_vnic, do_delete_vnic, do_show_vnic;
static cmdfunc_t do_up_vnic;
+static cmdfunc_t do_create_part, do_delete_part, do_show_part, do_show_ib;
+static cmdfunc_t do_up_part;
static cmdfunc_t do_create_etherstub, do_delete_etherstub, do_show_etherstub;
static cmdfunc_t do_create_simnet, do_modify_simnet;
static cmdfunc_t do_delete_simnet, do_show_simnet, do_up_simnet;
@@ -200,6 +222,8 @@
static void do_up_vnic_common(int, char **, const char *, boolean_t);
+static int show_part(dladm_handle_t, datalink_id_t, void *);
+
static void altroot_cmd(char *, int, char **);
static int show_linkprop_onelink(dladm_handle_t, datalink_id_t, void *);
@@ -327,6 +351,17 @@
" show-vnic [-pP] [-l <link>] [-s [-i <interval>]] "
"[<link>]\n" },
{ "up-vnic", do_up_vnic, NULL },
+ { "create-part", do_create_part,
+ " create-part [-t] [-f] -l <link> [-P <pkey>]\n"
+ "\t\t [-R <root-dir>] <part-link>" },
+ { "delete-part", do_delete_part,
+ " delete-part [-t] [-R <root-dir>] <part-link>"},
+ { "show-part", do_show_part,
+ " show-part [-pP] [-o <field>,...][-l <linkover>]\n"
+ "\t\t [<part-link>]" },
+ { "show-ib", do_show_ib,
+ " show-ib [-p] [-o <field>,...] [<link>]\n" },
+ { "up-part", do_up_part, NULL },
{ "create-etherstub", do_create_etherstub,
" create-etherstub [-t] <link>" },
{ "delete-etherstub", do_delete_etherstub,
@@ -483,6 +518,25 @@
{ 0, 0, 0, 0 }
};
+static const struct option part_lopts[] = {
+ {"temporary", no_argument, 0, 't' },
+ {"pkey", required_argument, 0, 'P' },
+ {"link", required_argument, 0, 'l' },
+ {"force", no_argument, 0, 'f' },
+ {"root-dir", required_argument, 0, 'R' },
+ {"prop", required_argument, 0, 'p' },
+ { 0, 0, 0, 0 }
+};
+
+static const struct option show_part_lopts[] = {
+ {"parsable", no_argument, 0, 'p' },
+ {"parseable", no_argument, 0, 'p' },
+ {"link", required_argument, 0, 'l' },
+ {"persistent", no_argument, 0, 'P' },
+ {"output", required_argument, 0, 'o' },
+ { 0, 0, 0, 0 }
+};
+
static const struct option etherstub_lopts[] = {
{"temporary", no_argument, 0, 't' },
{"root-dir", required_argument, 0, 'R' },
@@ -975,6 +1029,59 @@
;
/*
+ * structures for 'dladm show-ib'
+ */
+typedef struct ib_fields_buf_s
+{
+ char ib_link[DLPI_LINKNAME_MAX];
+ char ib_hcaguid[17];
+ char ib_portguid[17];
+ char ib_portnum[4];
+ char ib_state[6];
+ char ib_pkeys[MAXPKEYSTRSZ];
+} ib_fields_buf_t;
+
+static const ofmt_field_t ib_fields[] = {
+{ "LINK", 13,
+ offsetof(ib_fields_buf_t, ib_link), print_default_cb},
+{ "HCAGUID", IBGUIDSTRLEN,
+ offsetof(ib_fields_buf_t, ib_hcaguid), print_default_cb},
+{ "PORTGUID", IBGUIDSTRLEN,
+ offsetof(ib_fields_buf_t, ib_portguid), print_default_cb},
+{ "PORT", IBPORTSTRLEN,
+ offsetof(ib_fields_buf_t, ib_portnum), print_default_cb},
+{ "STATE", 7,
+ offsetof(ib_fields_buf_t, ib_state), print_default_cb},
+{ "PKEYS", 18,
+ offsetof(ib_fields_buf_t, ib_pkeys), print_default_cb},
+{ NULL, 0, 0, NULL}};
+
+/*
+ * structures for 'dladm show-part'
+ */
+typedef struct part_fields_buf_s
+{
+ char part_link[DLPI_LINKNAME_MAX];
+ char part_pkey[5];
+ char part_over[DLPI_LINKNAME_MAX];
+ char part_state[8];
+ char part_flags[5];
+} part_fields_buf_t;
+
+static const ofmt_field_t part_fields[] = {
+{ "LINK", 13,
+ offsetof(part_fields_buf_t, part_link), print_default_cb},
+{ "PKEY", MAXPKEYLEN,
+ offsetof(part_fields_buf_t, part_pkey), print_default_cb},
+{ "OVER", 13,
+ offsetof(part_fields_buf_t, part_over), print_default_cb},
+{ "STATE", 9,
+ offsetof(part_fields_buf_t, part_state), print_default_cb},
+{ "FLAGS", 5,
+ offsetof(part_fields_buf_t, part_flags), print_default_cb},
+{ NULL, 0, 0, NULL}};
+
+/*
* structures for 'dladm show-simnet'
*/
typedef struct simnet_fields_buf_s
@@ -2597,6 +2704,22 @@
(void) strcpy(lbuf->link_over, "?");
break;
}
+
+ case DATALINK_CLASS_PART: {
+ dladm_part_attr_t pinfo;
+
+ if (dladm_part_info(handle, linkid, &pinfo, flags) !=
+ DLADM_STATUS_OK) {
+ (void) strcpy(lbuf->link_over, "?");
+ break;
+ }
+ if (dladm_datalink_id2info(handle, pinfo.dia_physlinkid, NULL,
+ NULL, NULL, lbuf->link_over, sizeof (lbuf->link_over)) !=
+ DLADM_STATUS_OK)
+ (void) strcpy(lbuf->link_over, "?");
+ break;
+ }
+
case DATALINK_CLASS_BRIDGE: {
datalink_id_t *dlp;
uint_t i, nports;
@@ -8951,3 +9074,611 @@
warn(buf);
}
}
+
+/*
+ * Called from the walker dladm_walk_datalink_id() for each IB partition to
+ * display IB partition specific information.
+ */
+static dladm_status_t
+print_part(show_part_state_t *state, datalink_id_t linkid)
+{
+ dladm_part_attr_t attr;
+ dladm_status_t status;
+ dladm_conf_t conf;
+ char part_over[MAXLINKNAMELEN];
+ char part_name[MAXLINKNAMELEN];
+ part_fields_buf_t pbuf;
+ boolean_t force_in_conf = B_FALSE;
+
+ /*
+ * Get the information about the IB partition from the partition
+ * datlink ID 'linkid'.
+ */
+ if ((status = dladm_part_info(handle, linkid, &attr, state->ps_flags))
+ != DLADM_STATUS_OK)
+ return (status);
+
+ /*
+ * If an IB Phys link name was provided on the command line we have
+ * the Phys link's datalink ID in the ps_over_id field of the state
+ * structure. Proceed only if the IB partition represented by 'linkid'
+ * was created over Phys link denoted by ps_over_id. The
+ * 'dia_physlinkid' field of dladm_part_attr_t represents the IB Phys
+ * link over which the partition was created.
+ */
+ if (state->ps_over_id != DATALINK_ALL_LINKID)
+ if (state->ps_over_id != attr.dia_physlinkid)
+ return (DLADM_STATUS_OK);
+
+ /*
+ * The linkid argument passed to this function is the datalink ID
+ * of the IB Partition. Get the partitions name from this linkid.
+ */
+ if (dladm_datalink_id2info(handle, linkid, NULL, NULL,
+ NULL, part_name, sizeof (part_name)) != DLADM_STATUS_OK)
+ return (DLADM_STATUS_BADARG);
+
+ bzero(part_over, sizeof (part_over));
+
+ /*
+ * The 'dia_physlinkid' field contains the datalink ID of the IB Phys
+ * link over which the partition was created. Use this linkid to get the
+ * linkover field.
+ */
+ if (dladm_datalink_id2info(handle, attr.dia_physlinkid, NULL, NULL,
+ NULL, part_over, sizeof (part_over)) != DLADM_STATUS_OK)
+ (void) sprintf(part_over, "?");
+ state->ps_found = B_TRUE;
+
+ /*
+ * Read the FFORCE field from this datalink's persistent configuration
+ * database line to determine if this datalink was created forcibly.
+ * If this datalink is a temporary datalink, then it will not have an
+ * entry in the persistent configuration, so check if force create flag
+ * is set in the partition attributes.
+ *
+ * We need this two level check since persistent partitions brought up
+ * by up-part during boot will have force create flag always set, since
+ * we want up-part to always succeed even if the port is currently down
+ * or P_Key is not yet available in the subnet.
+ */
+ if ((status = dladm_read_conf(handle, linkid, &conf)) ==
+ DLADM_STATUS_OK) {
+ (void) dladm_get_conf_field(handle, conf, FFORCE,
+ &force_in_conf, sizeof (boolean_t));
+ dladm_destroy_conf(handle, conf);
+ } else if (status == DLADM_STATUS_NOTFOUND) {
+ /*
+ * for a temp link the force create flag will determine
+ * whether it was created with force flag.
+ */
+ force_in_conf = ((attr.dia_flags & DLADM_IBPART_FORCE_CREATE)
+ != 0);
+ }
+
+ (void) snprintf(pbuf.part_link, sizeof (pbuf.part_link),
+ "%s", part_name);
+
+ (void) snprintf(pbuf.part_over, sizeof (pbuf.part_over),
+ "%s", part_over);
+
+ (void) snprintf(pbuf.part_pkey, sizeof (pbuf.part_pkey),
+ "%X", attr.dia_pkey);
+
+ (void) get_linkstate(pbuf.part_link, B_TRUE, pbuf.part_state);
+
+ (void) snprintf(pbuf.part_flags, sizeof (pbuf.part_flags),
+ "%c----", force_in_conf ? 'f' : '-');
+
+ ofmt_print(state->ps_ofmt, &pbuf);
+
+ return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static int
+show_part(dladm_handle_t dh, datalink_id_t linkid, void *arg)
+{
+ ((show_part_state_t *)arg)->ps_status = print_part(arg, linkid);
+ return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * Show the information about the IB partition objects.
+ */
+static void
+do_show_part(int argc, char *argv[], const char *use)
+{
+ int option;
+ boolean_t l_arg = B_FALSE;
+ uint32_t flags = DLADM_OPT_ACTIVE;
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ datalink_id_t over_linkid = DATALINK_ALL_LINKID;
+ char over_link[MAXLINKNAMELEN];
+ show_part_state_t state;
+ dladm_status_t status;
+ boolean_t o_arg = B_FALSE;
+ char *fields_str = NULL;
+ ofmt_handle_t ofmt;
+ ofmt_status_t oferr;
+ uint_t ofmtflags = 0;
+
+ bzero(&state, sizeof (state));
+ opterr = 0;
+ while ((option = getopt_long(argc, argv, ":pPl:o:", show_part_lopts,
+ NULL)) != -1) {
+ switch (option) {
+ case 'p':
+ state.ps_parsable = B_TRUE;
+ break;
+ case 'P':
+ flags = DLADM_OPT_PERSIST;
+ break;
+ case 'l':
+ /*
+ * The data link ID of the IB Phys link. When this
+ * argument is provided we list only the partition
+ * objects created over this IB Phys link.
+ */
+ if (strlcpy(over_link, optarg, MAXLINKNAMELEN) >=
+ MAXLINKNAMELEN)
+ die("link name too long");
+
+ l_arg = B_TRUE;
+ break;
+ case 'o':
+ o_arg = B_TRUE;
+ fields_str = optarg;
+ break;
+ default:
+ die_opterr(optopt, option, use);
+ }
+ }
+
+ /*
+ * Get the partition ID (optional last argument).
+ */
+ if (optind == (argc - 1)) {
+ status = dladm_name2info(handle, argv[optind], &linkid, NULL,
+ NULL, NULL);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerr(status, "invalid partition link name '%s'",
+ argv[optind]);
+ }
+ (void) strlcpy(state.ps_part, argv[optind], MAXLINKNAMELEN);
+ } else if (optind != argc) {
+ usage();
+ }
+
+ if (state.ps_parsable && !o_arg)
+ die("-p requires -o");
+
+ /*
+ * If an IB Phys link name was provided as an argument, then get its
+ * datalink ID.
+ */
+ if (l_arg) {
+ status = dladm_name2info(handle, over_link, &over_linkid, NULL,
+ NULL, NULL);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerr(status, "invalid link name '%s'", over_link);
+ }
+ }
+
+ state.ps_over_id = over_linkid; /* IB Phys link ID */
+ state.ps_found = B_FALSE;
+ state.ps_flags = flags;
+
+ if (state.ps_parsable)
+ ofmtflags |= OFMT_PARSABLE;
+ oferr = ofmt_open(fields_str, part_fields, ofmtflags, 0, &ofmt);
+ dladm_ofmt_check(oferr, state.ps_parsable, ofmt);
+ state.ps_ofmt = ofmt;
+
+ /*
+ * If a specific IB partition name was not provided as an argument,
+ * walk all the datalinks and display the information for all
+ * IB partitions. If IB Phys link was provided limit it to only
+ * IB partitions created over that IB Phys link.
+ */
+ if (linkid == DATALINK_ALL_LINKID) {
+ (void) dladm_walk_datalink_id(show_part, handle, &state,
+ DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE, flags);
+ } else {
+ (void) show_part(handle, linkid, &state);
+ if (state.ps_status != DLADM_STATUS_OK) {
+ ofmt_close(ofmt);
+ die_dlerr(state.ps_status, "failed to show IB partition"
+ " '%s'", state.ps_part);
+ }
+ }
+ ofmt_close(ofmt);
+}
+
+
+/*
+ * Called from the walker dladm_walk_datalink_id() for each IB Phys link to
+ * display IB specific information for these Phys links.
+ */
+static dladm_status_t
+print_ib(show_ib_state_t *state, datalink_id_t phys_linkid)
+{
+ dladm_ib_attr_t attr;
+ dladm_status_t status;
+ char linkname[MAXLINKNAMELEN];
+ char pkeystr[MAXPKEYLEN];
+ int i;
+ ib_fields_buf_t ibuf;
+
+ bzero(&attr, sizeof (attr));
+
+ /*
+ * Get the attributes of the IB Phys link from active/Persistent config
+ * based on the flag passed.
+ */
+ if ((status = dladm_ib_info(handle, phys_linkid, &attr,
+ state->is_flags)) != DLADM_STATUS_OK)
+ return (status);
+
+ if ((state->is_link_id != DATALINK_ALL_LINKID) && (state->is_link_id
+ != attr.dia_physlinkid)) {
+ dladm_free_ib_info(&attr);
+ return (DLADM_STATUS_OK);
+ }
+
+ /*
+ * Get the data link name for the phys_linkid. If we are doing show-ib
+ * for all IB Phys links, we have only the datalink IDs not the
+ * datalink name.
+ */
+ if (dladm_datalink_id2info(handle, phys_linkid, NULL, NULL, NULL,
+ linkname, MAXLINKNAMELEN) != DLADM_STATUS_OK)
+ return (status);
+
+ (void) snprintf(ibuf.ib_link, sizeof (ibuf.ib_link),
+ "%s", linkname);
+
+ (void) snprintf(ibuf.ib_portnum, sizeof (ibuf.ib_portnum),
+ "%d", attr.dia_portnum);
+
+ (void) snprintf(ibuf.ib_hcaguid, sizeof (ibuf.ib_hcaguid),
+ "%llX", attr.dia_hca_guid);
+
+ (void) snprintf(ibuf.ib_portguid, sizeof (ibuf.ib_portguid),
+ "%llX", attr.dia_port_guid);
+
+ (void) get_linkstate(linkname, B_TRUE, ibuf.ib_state);
+
+ /*
+ * Create a comma separated list of pkeys from the pkey table returned
+ * by the IP over IB driver instance.
+ */
+ bzero(ibuf.ib_pkeys, attr.dia_port_pkey_tbl_sz * sizeof (ib_pkey_t));
+ for (i = 0; i < attr.dia_port_pkey_tbl_sz; i++) {
+ if (attr.dia_port_pkeys[i] != IB_PKEY_INVALID_FULL &&
+ attr.dia_port_pkeys[i] != IB_PKEY_INVALID_LIMITED) {
+ if (i == 0)
+ (void) snprintf(pkeystr, MAXPKEYLEN, "%X",
+ attr.dia_port_pkeys[i]);
+ else
+ (void) snprintf(pkeystr, MAXPKEYLEN, ",%X",
+ attr.dia_port_pkeys[i]);
+ (void) strlcat(ibuf.ib_pkeys, pkeystr, MAXPKEYSTRSZ);
+ }
+ }
+
+ dladm_free_ib_info(&attr);
+
+ ofmt_print(state->is_ofmt, &ibuf);
+
+ return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static int
+show_ib(dladm_handle_t dh, datalink_id_t linkid, void *arg)
+{
+ ((show_ib_state_t *)arg)->is_status = print_ib(arg, linkid);
+ return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * Show the properties of one/all IB Phys links. This is different from
+ * show-phys command since this will display IB specific information about the
+ * Phys link like, HCA GUID, PORT GUID, PKEYS active for this port etc.
+ */
+static void
+do_show_ib(int argc, char *argv[], const char *use)
+{
+ int option;
+ uint32_t flags = DLADM_OPT_ACTIVE;
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ show_ib_state_t state;
+ dladm_status_t status;
+ boolean_t o_arg = B_FALSE;
+ char *fields_str = NULL;
+ ofmt_handle_t ofmt;
+ ofmt_status_t oferr;
+ uint_t ofmtflags = 0;
+
+ bzero(&state, sizeof (state));
+ opterr = 0;
+ while ((option = getopt_long(argc, argv, ":po:", show_lopts,
+ NULL)) != -1) {
+ switch (option) {
+ case 'p':
+ state.is_parsable = B_TRUE;
+ break;
+ case 'o':
+ o_arg = B_TRUE;
+ fields_str = optarg;
+ break;
+ default:
+ die_opterr(optopt, option, use);
+ }
+ }
+
+ /* get IB Phys link ID (optional last argument) */
+ if (optind == (argc - 1)) {
+ status = dladm_name2info(handle, argv[optind], &linkid, NULL,
+ NULL, NULL);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerr(status, "invalid IB port name '%s'",
+ argv[optind]);
+ }
+ (void) strlcpy(state.is_link, argv[optind], MAXLINKNAMELEN);
+ } else if (optind != argc) {
+ usage();
+ }
+
+ if (state.is_parsable && !o_arg)
+ die("-p requires -o");
+
+ /*
+ * linkid is the data link ID of the IB Phys link. By default it will
+ * be DATALINK_ALL_LINKID.
+ */
+ state.is_link_id = linkid;
+ state.is_flags = flags;
+
+ if (state.is_parsable)
+ ofmtflags |= OFMT_PARSABLE;
+ oferr = ofmt_open(fields_str, ib_fields, ofmtflags, 0, &ofmt);
+ dladm_ofmt_check(oferr, state.is_parsable, ofmt);
+ state.is_ofmt = ofmt;
+
+ /*
+ * If we are going to display the information for all IB Phys links
+ * then we'll walk through all the datalinks for datalinks of Phys
+ * class and media type IB.
+ */
+ if (linkid == DATALINK_ALL_LINKID) {
+ (void) dladm_walk_datalink_id(show_ib, handle, &state,
+ DATALINK_CLASS_PHYS, DL_IB, flags);
+ } else {
+ /*
+ * We need to display the information only for the IB phys link
+ * linkid. Call show_ib for this link.
+ */
+ (void) show_ib(handle, linkid, &state);
+ if (state.is_status != DLADM_STATUS_OK) {
+ ofmt_close(ofmt);
+ die_dlerr(state.is_status, "failed to show IB Phys link"
+ " '%s'", state.is_link);
+ }
+ }
+ ofmt_close(ofmt);
+}
+
+/*
+ * Create an IP over Infiniband partition object over an IB Phys link. The IB
+ * Phys link is associated with an Infiniband HCA port. The IB partition object
+ * is created over a port, pkey combination. This partition object represents
+ * an instance of IP over IB interface.
+ */
+/* ARGSUSED */
+static void
+do_create_part(int argc, char *argv[], const char *use)
+{
+ int status, option;
+ int flags = DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST;
+ char *pname;
+ char *l_arg = NULL;
+ char *altroot = NULL;
+ datalink_id_t physlinkid = 0;
+ datalink_id_t partlinkid = 0;
+ ib_pkey_t pkey = 0;
+ char *endp = NULL;
+ char propstr[DLADM_STRSIZE];
+ dladm_arg_list_t *proplist = NULL;
+
+ propstr[0] = '\0';
+ while ((option = getopt_long(argc, argv, ":tfl:P:R:p:",
+ part_lopts, NULL)) != -1) {
+ switch (option) {
+ case 't':
+ /*
+ * Create a temporary IB partition object. This
+ * instance is not entered into the persistent database
+ * so it will not be recreated automatically on a
+ * reboot.
+ */
+ flags &= ~DLADM_OPT_PERSIST;
+ break;
+ case 'l':
+ /*
+ * The IB phys link over which the partition object will
+ * be created.
+ */
+ l_arg = optarg;
+ break;
+ case 'R':
+ altroot = optarg;
+ break;
+ case 'p':
+ (void) strlcat(propstr, optarg, DLADM_STRSIZE);
+ if (strlcat(propstr, ",", DLADM_STRSIZE) >=
+ DLADM_STRSIZE)
+ die("property list too long '%s'", propstr);
+ break;
+ case 'P':
+ /*
+ * The P_Key for the port, pkey tuple of the partition
+ * object. This P_Key should exist in the IB subnet.
+ * The partition creation for a non-existent P_Key will
+ * fail unless the -f option is used.
+ *
+ * The P_Key is expected to be a hexadecimal number.
+ */
+ pkey = strtoul(optarg, &endp, 16);
+ if (errno == ERANGE || pkey > USHRT_MAX ||
+ *endp != '\0')
+ die("Invalid pkey");
+ break;
+ case 'f':
+ flags |= DLADM_OPT_FORCE;
+ break;
+ default:
+ die_opterr(optopt, option, use);
+ break;
+ }
+ }
+
+ /* check required options */
+ if (!l_arg)
+ usage();
+
+ /* the partition name is a required operand */
+ if (optind != (argc - 1))
+ usage();
+
+ pname = argv[argc - 1];
+
+ /*
+ * Verify that the partition object's name is in the valid link name
+ * format.
+ */
+ if (!dladm_valid_linkname(pname))
+ die("Invalid link name '%s'", pname);
+
+ /* pkey is a mandatory argument */
+ if (pkey == 0)
+ usage();
+
+ if (altroot != NULL)
+ altroot_cmd(altroot, argc, argv);
+
+ /*
+ * Get the data link id of the IB Phys link over which we will be
+ * creating partition object.
+ */
+ if (dladm_name2info(handle, l_arg,
+ &physlinkid, NULL, NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link name '%s'", l_arg);
+
+ /*
+ * parse the property list provided with -p option.
+ */
+ if (dladm_parse_link_props(propstr, &proplist, B_FALSE)
+ != DLADM_STATUS_OK)
+ die("invalid IB partition property");
+
+ /*
+ * Call the library routine to create the partition object.
+ */
+ status = dladm_part_create(handle, physlinkid, pkey, flags, pname,
+ &partlinkid, proplist);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status,
+ "partition %x creation over %s failed", pkey, l_arg);
+}
+
+/*
+ * Delete an IP over Infiniband partition object. The partition object should
+ * be unplumbed before attempting the delete.
+ */
+static void
+do_delete_part(int argc, char *argv[], const char *use)
+{
+ int option, flags = DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST;
+ int status;
+ char *altroot = NULL;
+ datalink_id_t partid;
+
+ opterr = 0;
+ while ((option = getopt_long(argc, argv, "R:t", part_lopts,
+ NULL)) != -1) {
+ switch (option) {
+ case 't':
+ flags &= ~DLADM_OPT_PERSIST;
+ break;
+ case 'R':
+ altroot = optarg;
+ break;
+ default:
+ die_opterr(optopt, option, use);
+ }
+ }
+
+ /* get partition name (required last argument) */
+ if (optind != (argc - 1))
+ usage();
+
+ if (altroot != NULL)
+ altroot_cmd(altroot, argc, argv);
+
+ /*
+ * Get the data link id of the partition object given the partition
+ * name.
+ */
+ status = dladm_name2info(handle, argv[optind], &partid, NULL, NULL,
+ NULL);
+ if (status != DLADM_STATUS_OK)
+ die("invalid link name '%s'", argv[optind]);
+
+ /*
+ * Call the library routine to delete the IB partition. This will
+ * result in the IB partition object and all its resources getting
+ * deleted.
+ */
+ status = dladm_part_delete(handle, partid, flags);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "%s: partition deletion failed",
+ argv[optind]);
+}
+
+/*
+ * Bring up all or one IB partition already present in the persistent database
+ * but not active yet.
+ *
+ * This sub-command is used during the system boot up to bring up all IB
+ * partitions present in the persistent database. This is similar to a
+ * create partition except that, the partitions are always created even if the
+ * HCA port is down or P_Key is not present in the IB subnet. This is similar
+ * to using the 'force' option while creating the partition except that the 'f'
+ * flag will be set in the flags field only if the create-part for this command
+ * was called with '-f' option.
+ */
+/* ARGSUSED */
+static void
+do_up_part(int argc, char *argv[], const char *use)
+{
+ datalink_id_t partid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+
+ /*
+ * If a partition name was passed as an argument, get its data link
+ * id. By default we'll attempt to bring up all IB partition data
+ * links.
+ */
+ if (argc == 2) {
+ status = dladm_name2info(handle, argv[argc - 1], &partid, NULL,
+ NULL, NULL);
+ if (status != DLADM_STATUS_OK)
+ return;
+ } else if (argc > 2) {
+ usage();
+ }
+
+ (void) dladm_part_up(handle, partid, 0);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,61 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+MANIFEST = ibd-post-upgrade.xml
+SVCMETHOD = ibd-post-upgrade
+
+include ../Makefile.cmd
+
+SHFILES = ibd_upgrade
+IBD_DELETE_LINK = ibd_delete_link
+OBJS = $(IBD_DELETE_LINK).o
+SRCS = $(OBJS:%.o=%.c)
+
+CLOBBERFILES = $(SHFILES) $(IBD_DELETE_LINK) $(OBJS)
+
+ROOTMANIFESTDIR = $(ROOTSVCNETWORK)
+LDLIBS += -ldladm
+
+.KEEP_STATE:
+
+all: $(IBD_DELETE_LINK) $(SHFILES)
+
+install: all \
+ $(ROOTSBIN)/$(IBD_DELETE_LINK) \
+ $(ROOTMANIFEST) \
+ $(ROOTSVCMETHOD) \
+ $(ROOTSBIN)/ibd_upgrade
+
+check: $(CHKMANIFEST)
+ $(CSTYLE) -pP $(SRCS)
+
+clean:
+
+lint: lint_SRCS
+
+include ../Makefile.targ
+
+$(ROOTSBIN)/%: %
+ $(INS.file)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd-post-upgrade Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,47 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+. /lib/svc/share/smf_include.sh
+
+#
+# Actual work of ibd upgrade is done in network/physical service.
+# Here we mearly set the property to indicate that the ibd upgrade has been
+# done. Setting of the property can not be done in network/physical service
+# because the file system is read-only at that point.
+#
+if smf_is_globalzone; then
+ NETPHYS=svc:/network/physical:default
+ PROP=ibd/ibd_upgraded
+
+ upgrade_done=`/bin/svcprop -c -p $PROP $NETPHYS 2> /dev/null`
+
+ if [ "$upgrade_done" != "true" ]; then
+ /usr/sbin/svccfg -s $NETPHYS addpg ibd system 2> /dev/null
+ /usr/sbin/svccfg -s $NETPHYS setprop $PROP = boolean: true
+ fi
+fi
+
+/usr/sbin/svcadm disable $SMF_FMRI
+exit $SMF_EXIT_OK
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd-post-upgrade.xml Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+ Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ NOTE: This service manifest is not editable; its contents will
+ be overwritten by package or patch operations, including
+ operating system upgrade. Make customizations in a different
+ file.
+-->
+
+<service_bundle type='manifest' name='SUNWipoib:ibd-post-upgrade'>
+
+<service
+ name='network/ibd-post-upgrade'
+ type='service'
+ version='1'>
+
+ <create_default_instance enabled='true' />
+
+ <single_instance />
+
+ <dependency
+ name='network-physical'
+ type='service'
+ grouping='require_all'
+ restart_on='none'>
+ <service_fmri value='svc:/network/physical' />
+ </dependency>
+
+ <dependency
+ name='filesystem-minimal'
+ type='service'
+ grouping='require_all'
+ restart_on='none'>
+ <service_fmri value='svc:/system/filesystem/minimal' />
+ </dependency>
+
+ <exec_method
+ type='method'
+ name='start'
+ exec='/lib/svc/method/ibd-post-upgrade'
+ timeout_seconds='0' />
+
+ <exec_method
+ type='method'
+ name='stop'
+ exec=':true'
+ timeout_seconds='0' />
+
+ <property_group name='startd' type='framework'>
+ <propval name='duration' type='astring' value='transient' />
+ </property_group>
+
+ <stability value='Unstable' />
+
+ <template>
+ <common_name>
+ <loctext xml:lang='C'>
+ ibd upgrade
+ </loctext>
+ </common_name>
+
+ <documentation>
+ <manpage
+ title='ibp'
+ section='7D'
+ manpath='/usr/share/man' />
+ </documentation>
+ </template>
+</service>
+
+</service_bundle>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd_delete_link.c Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <door.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/mman.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdllink.h>
+
+extern dladm_status_t dladm_door_fd(dladm_handle_t, int *);
+
+static dladm_status_t
+ibd_dladm_door_call(dladm_handle_t handle, void *arg, size_t asize, void *rbuf,
+ size_t rsize)
+{
+ door_arg_t darg;
+ int door_fd;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ darg.data_ptr = arg;
+ darg.data_size = asize;
+ darg.desc_ptr = NULL;
+ darg.desc_num = 0;
+ darg.rbuf = rbuf;
+ darg.rsize = rsize;
+
+ /* The door descriptor is opened if it isn't already */
+ if ((status = dladm_door_fd(handle, &door_fd)) != DLADM_STATUS_OK)
+ return (status);
+
+ if (door_call(door_fd, &darg) == -1)
+ return (DLADM_STATUS_FAILED);
+
+ if (darg.rbuf != rbuf) {
+ /*
+ * The size of the input rbuf is not big enough so that
+ * the door allocate the rbuf itself. In this case, simply
+ * think something wrong with the door call.
+ */
+ (void) munmap(darg.rbuf, darg.rsize);
+ return (DLADM_STATUS_TOOSMALL);
+ }
+
+ if (darg.rsize != rsize)
+ return (DLADM_STATUS_FAILED);
+
+ if ((((dlmgmt_retval_t *)rbuf)->lr_err) == 0)
+ return (DLADM_STATUS_OK);
+ else
+ return (DLADM_STATUS_FAILED);
+}
+
+static int
+ibd_delete_link(dladm_handle_t dlh, char *link)
+{
+ dlmgmt_door_getlinkid_t getlinkid;
+ dlmgmt_getlinkid_retval_t retval;
+ datalink_id_t linkid;
+ dladm_status_t status;
+ char errmsg[DLADM_STRSIZE];
+
+ getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
+ (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+
+ if ((status = ibd_dladm_door_call(dlh, &getlinkid, sizeof (getlinkid),
+ &retval, sizeof (retval))) != DLADM_STATUS_OK) {
+ (void) fprintf(stderr,
+ "dladm_door_call failed: %s; linkname = %s\n",
+ dladm_status2str(status, errmsg), link);
+ return (status);
+ }
+
+ if (retval.lr_class != DATALINK_CLASS_PHYS) {
+ (void) fprintf(stderr,
+ "Not a physical link: linkname = %s, class = 0x%x\n",
+ link, (uint_t)retval.lr_class);
+ return (status);
+ }
+
+ linkid = retval.lr_linkid;
+
+ if ((status = dladm_remove_conf(dlh, linkid)) != DLADM_STATUS_OK) {
+ (void) fprintf(stderr, "dladm_remove_conf failed: %s\n",
+ dladm_status2str(status, errmsg));
+ return (status);
+ }
+
+ if ((status = dladm_destroy_datalink_id(dlh, linkid,
+ DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST)) != DLADM_STATUS_OK) {
+ (void) fprintf(stderr, "dladm_destroy_datalink_id failed: %s\n",
+ dladm_status2str(status, errmsg));
+ }
+
+ return (status);
+}
+
+int
+main(int argc, char *argv[])
+{
+ dladm_handle_t dlh;
+ int i;
+ dladm_status_t status;
+ char errmsg[DLADM_STRSIZE];
+
+ if (argc < 2) {
+ (void) fprintf(stderr,
+ "Usage: ibd_delete_link linkname ...\n");
+ return (2);
+ }
+
+ if ((status = dladm_open(&dlh)) != DLADM_STATUS_OK) {
+ (void) fprintf(stderr, "Failed to open dladm handle: %s\n",
+ dladm_status2str(status, errmsg));
+ return (1);
+ }
+
+ for (i = 1; i < argc; i++) {
+ if (ibd_delete_link(dlh, argv[i]) != DLADM_STATUS_OK) {
+ dladm_close(dlh);
+ return (1);
+ }
+ }
+
+ dladm_close(dlh);
+ return (0);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd_upgrade.sh Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,159 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+#
+
+PATH=/sbin:/bin
+ORIGIFS="${IFS}"
+USAGE="Usage: ibd_upgrade [-v]"
+DRVCONF=/kernel/drv/ibp.conf.old
+
+#
+# split device path into path components
+#
+split_path_components()
+{
+ hca_path=
+ node_name=
+ port=
+ pkey=
+ service=
+ partition_name=
+
+ hca_path="/dev/`dirname $device_path`"
+ bname=`basename $device_path`
+ IFS=":"
+ set -- $bname
+ node_at_addr=$1
+ partition_name=$2
+ IFS="@"
+ set -- $node_at_addr
+ node_name=$1
+ IFS=","
+ set -- $2
+ port=$1
+ pkey=0x$2
+ service=$3
+
+ IFS="${ORIGIFS}"
+}
+
+do_cmd()
+{
+ if [ $verbose -eq 1 ]; then
+ echo "$1"
+ fi
+ $1
+}
+
+process_rc_mode()
+{
+ device=$1
+
+ #
+ # Get the instance number of ibd
+ # Device name format would be ibd#,
+ #
+ IFS="d"
+ set -- ${device}
+ IFS="${ORIGIFS}"
+
+ if [ "$1" != "ib" ]; then
+ return
+ fi
+
+ inst=$2
+
+ IFS=","
+ set -- ${enable_rc}
+ IFS="${ORIGIFS}"
+
+ if [ ${inst} -lt $# ]; then
+ (( inst = $inst + 1 ))
+ eval "linkmode=\$${inst}"
+ else
+ linkmode=0
+ fi
+
+ if [ "$linkmode" = "0" ]; then
+ do_cmd "dladm set-linkprop -p linkmode=ud ${device}"
+ fi
+}
+
+verbose=0
+while getopts v c
+do
+ case $c in
+ v) verbose=1;;
+ \?) echo "$USAGE" 1>&2
+ exit 2;;
+ esac
+done
+
+enable_rc=
+if [ -f ${DRVCONF} ]; then
+ enable_rc=`egrep "^[ ]*enable_rc[ ]*=" ${DRVCONF} | sed -e "s/[ ]*//g" -e "s/enable_rc=//" -e "s/;$//" 2>/dev/null`
+fi
+
+#
+# Loop through all ibd devices based on the old model (i.e., one ibd instance
+# per partition; consequently device names have non zero pkey)
+# and create data links with the same names as in the old model under the
+# new model.
+#
+ls -l /dev/ibd* 2> /dev/null \
+ | while read x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 device_path
+do
+ split_path_components
+
+ if [ "$node_name" != "ibport" -o "$service" != "ipib" \
+ -o "$pkey" = "0x0" -o "$pkey" = "0x" ]; then
+ continue
+ fi
+
+ # verify that the hca path exists
+ cd $hca_path 2> /dev/null
+ if [ $? -ne 0 ]; then
+ continue
+ fi
+
+ fn=`echo ibport@${port},0,ipib:ibp*[0-9]`
+ if [ -c "$fn" ]; then
+ IFS=":"
+ set -- $fn
+ IFS="${ORIGIFS}"
+
+ do_cmd "dladm delete-phys $partition_name" 2>/dev/null
+ if [ $? -ne 0 ]; then
+ do_cmd "ibd_delete_link $partition_name"
+ fi
+ do_cmd "dladm create-part -f -l $2 -P $pkey $partition_name"
+
+ if [ "$enable_rc" != "" ]; then
+ process_rc_mode $partition_name
+ fi
+ fi
+done
+
+exit 0
--- a/usr/src/cmd/rcm_daemon/Makefile.com Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/rcm_daemon/Makefile.com Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
#
include ../../Makefile.cmd
@@ -50,6 +49,7 @@
$(COMMON)/network_rcm.c \
$(COMMON)/vlan_rcm.c \
$(COMMON)/vnic_rcm.c \
+ $(COMMON)/ibpart_rcm.c \
$(COMMON)/aggr_rcm.c \
$(COMMON)/ip_rcm.c \
$(COMMON)/cluster_rcm.c \
@@ -74,6 +74,7 @@
network_rcm.o \
vlan_rcm.o \
vnic_rcm.o \
+ ibpart_rcm.o \
aggr_rcm.o \
ip_rcm.o \
cluster_rcm.o \
@@ -94,6 +95,7 @@
SUNW_network_rcm.so \
SUNW_vlan_rcm.so \
SUNW_vnic_rcm.so \
+ SUNW_ibpart_rcm.so \
SUNW_aggr_rcm.so \
SUNW_ip_rcm.so \
SUNW_cluster_rcm.so \
@@ -128,6 +130,7 @@
SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
+SUNW_ibpart_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_aggr_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil -ldladm -lipmp -lipadm
SUNW_ip_anon_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/rcm_daemon/common/ibpart_rcm.c Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,1368 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This RCM module adds support to the RCM framework for IBPART links
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <synch.h>
+#include <assert.h>
+#include <strings.h>
+#include "rcm_module.h"
+#include <libintl.h>
+#include <libdllink.h>
+#include <libdlib.h>
+#include <libdlpi.h>
+
+/*
+ * Definitions
+ */
+#ifndef lint
+#define _(x) gettext(x)
+#else
+#define _(x) x
+#endif
+
+/* Some generic well-knowns and defaults used in this module */
+#define RCM_LINK_PREFIX "SUNW_datalink" /* RCM datalink name prefix */
+#define RCM_LINK_RESOURCE_MAX (13 + LINKID_STR_WIDTH)
+
+/* IBPART link flags */
+typedef enum {
+ IBPART_OFFLINED = 0x1,
+ IBPART_CONSUMER_OFFLINED = 0x2,
+ IBPART_STALE = 0x4
+} ibpart_flag_t;
+
+/* link representation */
+typedef struct dl_ibpart {
+ struct dl_ibpart *dlib_next; /* next IBPART on this link */
+ struct dl_ibpart *dlib_prev; /* prev IBPART on this link */
+ datalink_id_t dlib_ibpart_id;
+ ibpart_flag_t dlib_flags; /* IBPART link flags */
+} dl_ibpart_t;
+
+/* IBPART Cache state flags */
+typedef enum {
+ CACHE_NODE_STALE = 0x1, /* stale cached data */
+ CACHE_NODE_NEW = 0x2, /* new cached nodes */
+ CACHE_NODE_OFFLINED = 0x4 /* nodes offlined */
+} cache_node_state_t;
+
+/* Network Cache lookup options */
+#define CACHE_NO_REFRESH 0x1 /* cache refresh not needed */
+#define CACHE_REFRESH 0x2 /* refresh cache */
+
+/* Cache element */
+typedef struct link_cache {
+ struct link_cache *pc_next; /* next cached resource */
+ struct link_cache *pc_prev; /* prev cached resource */
+ char *pc_resource; /* resource name */
+ datalink_id_t pc_linkid; /* linkid */
+ dl_ibpart_t *pc_ibpart; /* IBPART list on this link */
+ cache_node_state_t pc_state; /* cache state flags */
+} link_cache_t;
+
+/*
+ * Global cache for network IBPARTs
+ */
+static link_cache_t cache_head;
+static link_cache_t cache_tail;
+static mutex_t cache_lock;
+static int events_registered = 0;
+
+static dladm_handle_t dld_handle = NULL;
+
+/*
+ * RCM module interface prototypes
+ */
+static int ibpart_register(rcm_handle_t *);
+static int ibpart_unregister(rcm_handle_t *);
+static int ibpart_get_info(rcm_handle_t *, char *, id_t, uint_t,
+ char **, char **, nvlist_t *, rcm_info_t **);
+static int ibpart_suspend(rcm_handle_t *, char *, id_t,
+ timespec_t *, uint_t, char **, rcm_info_t **);
+static int ibpart_resume(rcm_handle_t *, char *, id_t, uint_t,
+ char **, rcm_info_t **);
+static int ibpart_offline(rcm_handle_t *, char *, id_t, uint_t,
+ char **, rcm_info_t **);
+static int ibpart_undo_offline(rcm_handle_t *, char *, id_t,
+ uint_t, char **, rcm_info_t **);
+static int ibpart_remove(rcm_handle_t *, char *, id_t, uint_t,
+ char **, rcm_info_t **);
+static int ibpart_notify_event(rcm_handle_t *, char *, id_t,
+ uint_t, char **, nvlist_t *, rcm_info_t **);
+static int ibpart_configure(rcm_handle_t *, datalink_id_t);
+
+/* Module private routines */
+static void cache_free();
+static int cache_update(rcm_handle_t *);
+static void cache_remove(link_cache_t *);
+static void node_free(link_cache_t *);
+static void cache_insert(link_cache_t *);
+static link_cache_t *cache_lookup(rcm_handle_t *, char *, char);
+static int ibpart_consumer_offline(rcm_handle_t *, link_cache_t *,
+ char **, uint_t, rcm_info_t **);
+static void ibpart_consumer_online(rcm_handle_t *, link_cache_t *,
+ char **, uint_t, rcm_info_t **);
+static int ibpart_offline_ibpart(link_cache_t *, uint32_t,
+ cache_node_state_t);
+static void ibpart_online_ibpart(link_cache_t *);
+static char *ibpart_usage(link_cache_t *);
+static void ibpart_log_err(datalink_id_t, char **, char *);
+static int ibpart_consumer_notify(rcm_handle_t *, datalink_id_t,
+ char **, uint_t, rcm_info_t **);
+
+/* Module-Private data */
+static struct rcm_mod_ops ibpart_ops =
+{
+ RCM_MOD_OPS_VERSION,
+ ibpart_register,
+ ibpart_unregister,
+ ibpart_get_info,
+ ibpart_suspend,
+ ibpart_resume,
+ ibpart_offline,
+ ibpart_undo_offline,
+ ibpart_remove,
+ NULL,
+ NULL,
+ ibpart_notify_event
+};
+
+/*
+ * rcm_mod_init() - Update registrations, and return the ops structure.
+ */
+struct rcm_mod_ops *
+rcm_mod_init(void)
+{
+ char errmsg[DLADM_STRSIZE];
+ dladm_status_t status;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: mod_init\n");
+
+ cache_head.pc_next = &cache_tail;
+ cache_head.pc_prev = NULL;
+ cache_tail.pc_prev = &cache_head;
+ cache_tail.pc_next = NULL;
+ (void) mutex_init(&cache_lock, 0, NULL);
+
+ if ((status = dladm_open(&dld_handle)) != DLADM_STATUS_OK) {
+ rcm_log_message(RCM_WARNING,
+ "IBPART: mod_init failed: cannot open datalink "
+ "handle: %s\n", dladm_status2str(status, errmsg));
+ return (NULL);
+ }
+
+ /* Return the ops vectors */
+ return (&ibpart_ops);
+}
+
+/*
+ * rcm_mod_info() - Return a string describing this module.
+ */
+const char *
+rcm_mod_info(void)
+{
+ rcm_log_message(RCM_TRACE1, "IBPART: mod_info\n");
+
+ return ("IBPART module");
+}
+
+/*
+ * rcm_mod_fini() - Destroy the network IBPART cache.
+ */
+int
+rcm_mod_fini(void)
+{
+ rcm_log_message(RCM_TRACE1, "IBPART: mod_fini\n");
+
+ /*
+ * Note that ibpart_unregister() does not seem to be called anywhere,
+ * therefore we free the cache nodes here. In theory we should call
+ * rcm_register_interest() for each node before we free it, the
+ * framework does not provide the rcm_handle to allow us to do so.
+ */
+ cache_free();
+ (void) mutex_destroy(&cache_lock);
+
+ dladm_close(dld_handle);
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_register() - Make sure the cache is properly sync'ed, and its
+ * registrations are in order.
+ */
+static int
+ibpart_register(rcm_handle_t *hd)
+{
+ rcm_log_message(RCM_TRACE1, "IBPART: register\n");
+
+ if (cache_update(hd) < 0)
+ return (RCM_FAILURE);
+
+ /*
+ * Need to register interest in all new resources
+ * getting attached, so we get attach event notifications
+ */
+ if (!events_registered) {
+ if (rcm_register_event(hd, RCM_RESOURCE_LINK_NEW, 0, NULL)
+ != RCM_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: failed to register %s\n"),
+ RCM_RESOURCE_LINK_NEW);
+ return (RCM_FAILURE);
+ } else {
+ rcm_log_message(RCM_DEBUG, "IBPART: registered %s\n",
+ RCM_RESOURCE_LINK_NEW);
+ events_registered++;
+ }
+ }
+
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_unregister() - Walk the cache, unregistering all the networks.
+ */
+static int
+ibpart_unregister(rcm_handle_t *hd)
+{
+ link_cache_t *node;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: unregister\n");
+
+ /* Walk the cache, unregistering everything */
+ (void) mutex_lock(&cache_lock);
+ node = cache_head.pc_next;
+ while (node != &cache_tail) {
+ if (rcm_unregister_interest(hd, node->pc_resource, 0)
+ != RCM_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: failed to unregister %s\n"),
+ node->pc_resource);
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_FAILURE);
+ }
+ cache_remove(node);
+ node_free(node);
+ node = cache_head.pc_next;
+ }
+ (void) mutex_unlock(&cache_lock);
+
+ /*
+ * Unregister interest in all new resources
+ */
+ if (events_registered) {
+ if (rcm_unregister_event(hd, RCM_RESOURCE_LINK_NEW, 0)
+ != RCM_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: failed to unregister %s\n"),
+ RCM_RESOURCE_LINK_NEW);
+ return (RCM_FAILURE);
+ } else {
+ rcm_log_message(RCM_DEBUG, "IBPART: unregistered %s\n",
+ RCM_RESOURCE_LINK_NEW);
+ events_registered--;
+ }
+ }
+
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_offline() - Offline IBPARTs on a specific node.
+ */
+static int
+ibpart_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+ char **errorp, rcm_info_t **info)
+{
+ link_cache_t *node;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: offline(%s)\n", rsrc);
+
+ /* Lock the cache and lookup the resource */
+ (void) mutex_lock(&cache_lock);
+ node = cache_lookup(hd, rsrc, CACHE_REFRESH);
+ if (node == NULL) {
+ /* should not happen because the resource is registered. */
+ ibpart_log_err(node->pc_linkid, errorp,
+ "unrecognized resource");
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_SUCCESS);
+ }
+
+ /*
+ * Inform consumers (IP interfaces) of associated IBPARTs to be offlined
+ */
+ if (ibpart_consumer_offline(hd, node, errorp, flags, info) ==
+ RCM_SUCCESS) {
+ rcm_log_message(RCM_DEBUG,
+ "IBPART: consumers agreed on offline\n");
+ } else {
+ ibpart_log_err(node->pc_linkid, errorp,
+ "consumers failed to offline");
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_FAILURE);
+ }
+
+ /* Check if it's a query */
+ if (flags & RCM_QUERY) {
+ rcm_log_message(RCM_TRACE1,
+ "IBPART: offline query succeeded(%s)\n", rsrc);
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_SUCCESS);
+ }
+
+ if (ibpart_offline_ibpart(node, IBPART_OFFLINED, CACHE_NODE_OFFLINED) !=
+ RCM_SUCCESS) {
+ ibpart_online_ibpart(node);
+ ibpart_log_err(node->pc_linkid, errorp, "offline failed");
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_FAILURE);
+ }
+
+ rcm_log_message(RCM_TRACE1, "IBPART: Offline succeeded(%s)\n", rsrc);
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_undo_offline() - Undo offline of a previously offlined node.
+ */
+/*ARGSUSED*/
+static int
+ibpart_undo_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+ char **errorp, rcm_info_t **info)
+{
+ link_cache_t *node;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: online(%s)\n", rsrc);
+
+ (void) mutex_lock(&cache_lock);
+ node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH);
+ if (node == NULL) {
+ ibpart_log_err(DATALINK_INVALID_LINKID, errorp, "no such link");
+ (void) mutex_unlock(&cache_lock);
+ errno = ENOENT;
+ return (RCM_FAILURE);
+ }
+
+ /* Check if no attempt should be made to online the link here */
+ if (!(node->pc_state & CACHE_NODE_OFFLINED)) {
+ ibpart_log_err(node->pc_linkid, errorp, "link not offlined");
+ (void) mutex_unlock(&cache_lock);
+ errno = ENOTSUP;
+ return (RCM_SUCCESS);
+ }
+
+ ibpart_online_ibpart(node);
+
+ /*
+ * Inform IP interfaces on associated IBPARTs to be onlined
+ */
+ ibpart_consumer_online(hd, node, errorp, flags, info);
+
+ node->pc_state &= ~CACHE_NODE_OFFLINED;
+ rcm_log_message(RCM_TRACE1, "IBPART: online succeeded(%s)\n", rsrc);
+ (void) mutex_unlock(&cache_lock);
+ return (RCM_SUCCESS);
+}
+
+static void
+ibpart_online_ibpart(link_cache_t *node)
+{
+ dl_ibpart_t *ibpart;
+ dladm_status_t status;
+ char errmsg[DLADM_STRSIZE];
+
+ /*
+ * Try to bring on all offlined IBPARTs
+ */
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ if (!(ibpart->dlib_flags & IBPART_OFFLINED))
+ continue;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: online DLID %d\n",
+ ibpart->dlib_ibpart_id);
+ if ((status = dladm_part_up(dld_handle,
+ ibpart->dlib_ibpart_id, 0)) != DLADM_STATUS_OK) {
+ /*
+ * Print a warning message and continue to online
+ * other IBPARTs.
+ */
+ rcm_log_message(RCM_WARNING,
+ _("IBPART: IBPART online failed (%u): %s\n"),
+ ibpart->dlib_ibpart_id,
+ dladm_status2str(status, errmsg));
+ } else {
+ ibpart->dlib_flags &= ~IBPART_OFFLINED;
+ }
+ }
+}
+
+static int
+ibpart_offline_ibpart(link_cache_t *node, uint32_t flags,
+ cache_node_state_t state)
+{
+ dl_ibpart_t *ibpart;
+ dladm_status_t status;
+ char errmsg[DLADM_STRSIZE];
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_offline_ibpart "
+ "(%s %u %u)\n", node->pc_resource, flags, state);
+
+ /*
+ * Try to delete all explicit created IBPART
+ */
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ rcm_log_message(RCM_TRACE1, "IBPART: offline DLID %d\n",
+ ibpart->dlib_ibpart_id);
+ if ((status = dladm_part_delete(dld_handle,
+ ibpart->dlib_ibpart_id, DLADM_OPT_ACTIVE)) !=
+ DLADM_STATUS_OK) {
+ rcm_log_message(RCM_WARNING,
+ _("IBPART: IBPART offline failed (%u): %s\n"),
+ ibpart->dlib_ibpart_id,
+ dladm_status2str(status, errmsg));
+ return (RCM_FAILURE);
+ } else {
+ rcm_log_message(RCM_TRACE1,
+ "IBPART: IBPART offline succeeded(%u)\n",
+ ibpart->dlib_ibpart_id);
+ ibpart->dlib_flags |= flags;
+ }
+ }
+
+ node->pc_state |= state;
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_get_info() - Gather usage information for this resource.
+ */
+/*ARGSUSED*/
+int
+ibpart_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+ char **usagep, char **errorp, nvlist_t *props, rcm_info_t **info)
+{
+ link_cache_t *node;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: get_info(%s)\n", rsrc);
+
+ (void) mutex_lock(&cache_lock);
+ node = cache_lookup(hd, rsrc, CACHE_REFRESH);
+ if (node == NULL) {
+ rcm_log_message(RCM_INFO,
+ _("IBPART: get_info(%s) unrecognized resource\n"), rsrc);
+ (void) mutex_unlock(&cache_lock);
+ errno = ENOENT;
+ return (RCM_FAILURE);
+ }
+
+ *usagep = ibpart_usage(node);
+ (void) mutex_unlock(&cache_lock);
+ if (*usagep == NULL) {
+ /* most likely malloc failure */
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: get_info(%s) malloc failure\n"), rsrc);
+ (void) mutex_unlock(&cache_lock);
+ errno = ENOMEM;
+ return (RCM_FAILURE);
+ }
+
+ /* Set client/role properties */
+ (void) nvlist_add_string(props, RCM_CLIENT_NAME, "IBPART");
+
+ rcm_log_message(RCM_TRACE1, "IBPART: get_info(%s) info = %s\n",
+ rsrc, *usagep);
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_suspend() - Nothing to do, always okay
+ */
+/*ARGSUSED*/
+static int
+ibpart_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval,
+ uint_t flags, char **errorp, rcm_info_t **info)
+{
+ rcm_log_message(RCM_TRACE1, "IBPART: suspend(%s)\n", rsrc);
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_resume() - Nothing to do, always okay
+ */
+/*ARGSUSED*/
+static int
+ibpart_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+ char **errorp, rcm_info_t **info)
+{
+ rcm_log_message(RCM_TRACE1, "IBPART: resume(%s)\n", rsrc);
+ return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_consumer_remove()
+ *
+ * Notify IBPART consumers to remove cache.
+ */
+static int
+ibpart_consumer_remove(rcm_handle_t *hd, link_cache_t *node, uint_t flags,
+ rcm_info_t **info)
+{
+ dl_ibpart_t *ibpart = NULL;
+ char rsrc[RCM_LINK_RESOURCE_MAX];
+ int ret = RCM_SUCCESS;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_remove (%s)\n",
+ node->pc_resource);
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+
+ /*
+ * This will only be called when the offline operation
+ * succeeds, so the IBPART consumers must have been offlined
+ * at this point.
+ */
+ assert(ibpart->dlib_flags & IBPART_CONSUMER_OFFLINED);
+
+ (void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+ RCM_LINK_PREFIX, ibpart->dlib_ibpart_id);
+
+ ret = rcm_notify_remove(hd, rsrc, flags, info);
+ if (ret != RCM_SUCCESS) {
+ rcm_log_message(RCM_WARNING,
+ _("IBPART: notify remove failed (%s)\n"), rsrc);
+ break;
+ }
+ }
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_remove done\n");
+ return (ret);
+}
+
+/*
+ * ibpart_remove() - remove a resource from cache
+ */
+/*ARGSUSED*/
+static int
+ibpart_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+ char **errorp, rcm_info_t **info)
+{
+ link_cache_t *node;
+ int rv;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: remove(%s)\n", rsrc);
+
+ (void) mutex_lock(&cache_lock);
+ node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH);
+ if (node == NULL) {
+ rcm_log_message(RCM_INFO,
+ _("IBPART: remove(%s) unrecognized resource\n"), rsrc);
+ (void) mutex_unlock(&cache_lock);
+ errno = ENOENT;
+ return (RCM_FAILURE);
+ }
+
+ /* remove the cached entry for the resource */
+ cache_remove(node);
+ (void) mutex_unlock(&cache_lock);
+
+ rv = ibpart_consumer_remove(hd, node, flags, info);
+ node_free(node);
+ return (rv);
+}
+
+/*
+ * ibpart_notify_event - Project private implementation to receive new resource
+ * events. It intercepts all new resource events. If the
+ * new resource is a network resource, pass up a notify
+ * for it too. The new resource need not be cached, since
+ * it is done at register again.
+ */
+/*ARGSUSED*/
+static int
+ibpart_notify_event(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+ char **errorp, nvlist_t *nvl, rcm_info_t **info)
+{
+ nvpair_t *nvp = NULL;
+ datalink_id_t linkid;
+ uint64_t id64;
+ int rv = RCM_SUCCESS;
+
+ rcm_log_message(RCM_TRACE1, "IBPART: notify_event(%s)\n", rsrc);
+
+ if (strcmp(rsrc, RCM_RESOURCE_LINK_NEW) != 0) {
+ ibpart_log_err(DATALINK_INVALID_LINKID, errorp,
+ "unrecognized event");
+ errno = EINVAL;
+ return (RCM_FAILURE);
+ }
+
+ /* Update cache to reflect latest IBPARTs */
+ if (cache_update(hd) < 0) {
+ ibpart_log_err(DATALINK_INVALID_LINKID, errorp,
+ "private Cache update failed");
+ return (RCM_FAILURE);
+ }
+
+ /*
+ * Try best to recover all configuration.
+ */
+ rcm_log_message(RCM_DEBUG, "IBPART: process_nvlist\n");
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ if (strcmp(nvpair_name(nvp), RCM_NV_LINKID) != 0)
+ continue;
+
+ if (nvpair_value_uint64(nvp, &id64) != 0) {
+ ibpart_log_err(DATALINK_INVALID_LINKID, errorp,
+ "cannot get linkid");
+ rv = RCM_FAILURE;
+ continue;
+ }
+
+ linkid = (datalink_id_t)id64;
+ if (ibpart_configure(hd, linkid) != 0) {
+ ibpart_log_err(linkid, errorp, "configuring failed");
+ rv = RCM_FAILURE;
+ continue;
+ }
+
+ /* Notify all IBPART consumers */
+ if (ibpart_consumer_notify(hd, linkid, errorp, flags,
+ info) != 0) {
+ ibpart_log_err(linkid, errorp,
+ "consumer notify failed");
+ rv = RCM_FAILURE;
+ }
+ }
+
+ rcm_log_message(RCM_TRACE1,
+ "IBPART: notify_event: link configuration complete\n");
+ return (rv);
+}
+
+/*
+ * ibpart_usage - Determine the usage of a link.
+ * The returned buffer is owned by caller, and the caller
+ * must free it up when done.
+ */
+static char *
+ibpart_usage(link_cache_t *node)
+{
+ dl_ibpart_t *ibpart;
+ int nibpart;
+ char *buf;
+ const char *fmt;
+ char *sep;
+ char errmsg[DLADM_STRSIZE];
+ char name[MAXLINKNAMELEN];
+ dladm_status_t status;
+ size_t bufsz;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: usage(%s)\n", node->pc_resource);
+
+ assert(MUTEX_HELD(&cache_lock));
+ if ((status = dladm_datalink_id2info(dld_handle, node->pc_linkid, NULL,
+ NULL, NULL, name, sizeof (name))) != DLADM_STATUS_OK) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: usage(%s) get link name failure(%s)\n"),
+ node->pc_resource, dladm_status2str(status, errmsg));
+ return (NULL);
+ }
+
+ if (node->pc_state & CACHE_NODE_OFFLINED)
+ fmt = _("%1$s offlined");
+ else
+ fmt = _("%1$s IBPART: ");
+
+ /* TRANSLATION_NOTE: separator used between IBPART linkids */
+ sep = _(", ");
+
+ nibpart = 0;
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next)
+ nibpart++;
+
+ /* space for IBPARTs and separators, plus message */
+ bufsz = nibpart * (MAXLINKNAMELEN + strlen(sep)) +
+ strlen(fmt) + MAXLINKNAMELEN + 1;
+ if ((buf = malloc(bufsz)) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: usage(%s) malloc failure(%s)\n"),
+ node->pc_resource, strerror(errno));
+ return (NULL);
+ }
+ (void) snprintf(buf, bufsz, fmt, name);
+
+ if (node->pc_state & CACHE_NODE_OFFLINED) {
+ /* Nothing else to do */
+ rcm_log_message(RCM_TRACE2, "IBPART: usage (%s) info = %s\n",
+ node->pc_resource, buf);
+ return (buf);
+ }
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ rcm_log_message(RCM_DEBUG, "IBPART:= %u\n",
+ ibpart->dlib_ibpart_id);
+
+ if ((status = dladm_datalink_id2info(dld_handle,
+ ibpart->dlib_ibpart_id, NULL, NULL, NULL, name,
+ sizeof (name))) != DLADM_STATUS_OK) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: usage(%s) get ibpart %u name "
+ "failure(%s)\n"), node->pc_resource,
+ ibpart->dlib_ibpart_id,
+ dladm_status2str(status, errmsg));
+ free(buf);
+ return (NULL);
+ }
+
+ (void) strlcat(buf, name, bufsz);
+ if (ibpart->dlib_next != NULL)
+ (void) strlcat(buf, sep, bufsz);
+ }
+
+ rcm_log_message(RCM_TRACE2, "IBPART: usage (%s) info = %s\n",
+ node->pc_resource, buf);
+
+ return (buf);
+}
+
+/*
+ * Cache management routines, all cache management functions should be
+ * be called with cache_lock held.
+ */
+
+/*
+ * cache_lookup() - Get a cache node for a resource.
+ * Call with cache lock held.
+ *
+ * This ensures that the cache is consistent with the system state and
+ * returns a pointer to the cache element corresponding to the resource.
+ */
+static link_cache_t *
+cache_lookup(rcm_handle_t *hd, char *rsrc, char options)
+{
+ link_cache_t *node;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: cache lookup(%s)\n", rsrc);
+
+ assert(MUTEX_HELD(&cache_lock));
+ if (options & CACHE_REFRESH) {
+ /* drop lock since update locks cache again */
+ (void) mutex_unlock(&cache_lock);
+ (void) cache_update(hd);
+ (void) mutex_lock(&cache_lock);
+ }
+
+ node = cache_head.pc_next;
+ for (; node != &cache_tail; node = node->pc_next) {
+ if (strcmp(rsrc, node->pc_resource) == 0) {
+ rcm_log_message(RCM_TRACE2,
+ "IBPART: cache lookup succeeded(%s)\n", rsrc);
+ return (node);
+ }
+ }
+ return (NULL);
+}
+
+/*
+ * node_free - Free a node from the cache
+ */
+static void
+node_free(link_cache_t *node)
+{
+ dl_ibpart_t *ibpart, *next;
+
+ if (node != NULL) {
+ free(node->pc_resource);
+
+ /* free the IBPART list */
+ for (ibpart = node->pc_ibpart; ibpart != NULL; ibpart = next) {
+ next = ibpart->dlib_next;
+ free(ibpart);
+ }
+ free(node);
+ }
+}
+
+/*
+ * cache_insert - Insert a resource node in cache
+ */
+static void
+cache_insert(link_cache_t *node)
+{
+ assert(MUTEX_HELD(&cache_lock));
+
+ /* insert at the head for best performance */
+ node->pc_next = cache_head.pc_next;
+ node->pc_prev = &cache_head;
+
+ node->pc_next->pc_prev = node;
+ node->pc_prev->pc_next = node;
+}
+
+/*
+ * cache_remove() - Remove a resource node from cache.
+ */
+static void
+cache_remove(link_cache_t *node)
+{
+ assert(MUTEX_HELD(&cache_lock));
+ node->pc_next->pc_prev = node->pc_prev;
+ node->pc_prev->pc_next = node->pc_next;
+ node->pc_next = NULL;
+ node->pc_prev = NULL;
+}
+
+typedef struct ibpart_update_arg_s {
+ rcm_handle_t *hd;
+ int retval;
+} ibpart_update_arg_t;
+
+/*
+ * ibpart_update() - Update physical interface properties
+ */
+static int
+ibpart_update(dladm_handle_t handle, datalink_id_t ibpartid, void *arg)
+{
+ ibpart_update_arg_t *ibpart_update_argp = arg;
+ rcm_handle_t *hd = ibpart_update_argp->hd;
+ link_cache_t *node;
+ dl_ibpart_t *ibpart;
+ char *rsrc;
+ dladm_ib_attr_t ibpart_attr;
+ dladm_status_t status;
+ char errmsg[DLADM_STRSIZE];
+ boolean_t newnode = B_FALSE;
+ int ret = -1;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_update(%u)\n", ibpartid);
+
+ assert(MUTEX_HELD(&cache_lock));
+ status = dladm_part_info(handle, ibpartid, &ibpart_attr,
+ DLADM_OPT_ACTIVE);
+ if (status != DLADM_STATUS_OK) {
+ rcm_log_message(RCM_TRACE1,
+ "IBPART: ibpart_update() cannot get ibpart information for "
+ "%u(%s)\n", ibpartid, dladm_status2str(status, errmsg));
+ return (DLADM_WALK_CONTINUE);
+ }
+
+ if (ibpart_attr.dia_physlinkid == DATALINK_INVALID_LINKID) {
+ /*
+ * Skip the IB port nodes.
+ */
+ rcm_log_message(RCM_TRACE1,
+ "IBPART: ibpart_update(): skip the PORT nodes %u\n",
+ ibpartid);
+ return (DLADM_WALK_CONTINUE);
+ }
+
+ rsrc = malloc(RCM_LINK_RESOURCE_MAX);
+ if (rsrc == NULL) {
+ rcm_log_message(RCM_ERROR, _("IBPART: malloc error(%s): %u\n"),
+ strerror(errno), ibpartid);
+ goto done;
+ }
+
+ (void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+ RCM_LINK_PREFIX, ibpart_attr.dia_physlinkid);
+
+ node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH);
+ if (node != NULL) {
+ rcm_log_message(RCM_DEBUG,
+ "IBPART: %s already registered (ibpartid:%d)\n",
+ rsrc, ibpart_attr.dia_partlinkid);
+ free(rsrc);
+ } else {
+ rcm_log_message(RCM_DEBUG,
+ "IBPART: %s is a new resource (ibpartid:%d)\n",
+ rsrc, ibpart_attr.dia_partlinkid);
+ if ((node = calloc(1, sizeof (link_cache_t))) == NULL) {
+ free(rsrc);
+ rcm_log_message(RCM_ERROR, _("IBPART: calloc: %s\n"),
+ strerror(errno));
+ goto done;
+ }
+
+ node->pc_resource = rsrc;
+ node->pc_ibpart = NULL;
+ node->pc_linkid = ibpart_attr.dia_physlinkid;
+ node->pc_state |= CACHE_NODE_NEW;
+ newnode = B_TRUE;
+ }
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ if (ibpart->dlib_ibpart_id == ibpartid) {
+ ibpart->dlib_flags &= ~IBPART_STALE;
+ break;
+ }
+ }
+
+ if (ibpart == NULL) {
+ if ((ibpart = calloc(1, sizeof (dl_ibpart_t))) == NULL) {
+ rcm_log_message(RCM_ERROR, _("IBPART: malloc: %s\n"),
+ strerror(errno));
+ if (newnode) {
+ free(rsrc);
+ free(node);
+ }
+ goto done;
+ }
+ ibpart->dlib_ibpart_id = ibpartid;
+ ibpart->dlib_next = node->pc_ibpart;
+ ibpart->dlib_prev = NULL;
+ if (node->pc_ibpart != NULL)
+ node->pc_ibpart->dlib_prev = ibpart;
+ node->pc_ibpart = ibpart;
+ }
+
+ node->pc_state &= ~CACHE_NODE_STALE;
+
+ if (newnode)
+ cache_insert(node);
+
+ rcm_log_message(RCM_TRACE3, "IBPART: ibpart_update: succeeded(%u)\n",
+ ibpartid);
+ ret = 0;
+done:
+ ibpart_update_argp->retval = ret;
+ return (ret == 0 ? DLADM_WALK_CONTINUE : DLADM_WALK_TERMINATE);
+}
+
+/*
+ * ibpart_update_all() - Determine all IBPART links in the system
+ */
+static int
+ibpart_update_all(rcm_handle_t *hd)
+{
+ ibpart_update_arg_t arg = {NULL, 0};
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_update_all\n");
+
+ assert(MUTEX_HELD(&cache_lock));
+ arg.hd = hd;
+ (void) dladm_walk_datalink_id(ibpart_update, dld_handle, &arg,
+ DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE);
+ return (arg.retval);
+}
+
+/*
+ * cache_update() - Update cache with latest interface info
+ */
+static int
+cache_update(rcm_handle_t *hd)
+{
+ link_cache_t *node, *nnode;
+ dl_ibpart_t *ibpart;
+ int rv;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: cache_update\n");
+
+ (void) mutex_lock(&cache_lock);
+
+ /* first we walk the entire cache, marking each entry stale */
+ node = cache_head.pc_next;
+ for (; node != &cache_tail; node = node->pc_next) {
+ node->pc_state |= CACHE_NODE_STALE;
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next)
+ ibpart->dlib_flags |= IBPART_STALE;
+ }
+
+ rv = ibpart_update_all(hd);
+
+ /*
+ * Continue to delete all stale nodes from the cache even
+ * ibpart_update_all() failed. Unregister link that are not offlined
+ * and still in cache
+ */
+ for (node = cache_head.pc_next; node != &cache_tail; node = nnode) {
+ dl_ibpart_t *ibpart, *next;
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL; ibpart = next) {
+ next = ibpart->dlib_next;
+
+ /* clear stale IBPARTs */
+ if (ibpart->dlib_flags & IBPART_STALE) {
+ if (ibpart->dlib_prev != NULL)
+ ibpart->dlib_prev->dlib_next = next;
+ else
+ node->pc_ibpart = next;
+
+ if (next != NULL)
+ next->dlib_prev = ibpart->dlib_prev;
+ free(ibpart);
+ }
+ }
+
+ nnode = node->pc_next;
+ if (node->pc_state & CACHE_NODE_STALE) {
+ (void) rcm_unregister_interest(hd, node->pc_resource,
+ 0);
+ rcm_log_message(RCM_DEBUG, "IBPART: unregistered %s\n",
+ node->pc_resource);
+ assert(node->pc_ibpart == NULL);
+ cache_remove(node);
+ node_free(node);
+ continue;
+ }
+
+ if (!(node->pc_state & CACHE_NODE_NEW))
+ continue;
+
+ if (rcm_register_interest(hd, node->pc_resource, 0, NULL) !=
+ RCM_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: failed to register %s\n"),
+ node->pc_resource);
+ rv = -1;
+ } else {
+ rcm_log_message(RCM_DEBUG, "IBPART: registered %s\n",
+ node->pc_resource);
+ node->pc_state &= ~CACHE_NODE_NEW;
+ }
+ }
+
+ (void) mutex_unlock(&cache_lock);
+ return (rv);
+}
+
+/*
+ * cache_free() - Empty the cache
+ */
+static void
+cache_free()
+{
+ link_cache_t *node;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: cache_free\n");
+
+ (void) mutex_lock(&cache_lock);
+ node = cache_head.pc_next;
+ while (node != &cache_tail) {
+ cache_remove(node);
+ node_free(node);
+ node = cache_head.pc_next;
+ }
+ (void) mutex_unlock(&cache_lock);
+}
+
+/*
+ * ibpart_log_err() - RCM error log wrapper
+ */
+static void
+ibpart_log_err(datalink_id_t linkid, char **errorp, char *errmsg)
+{
+ char link[MAXLINKNAMELEN];
+ char errstr[DLADM_STRSIZE];
+ dladm_status_t status;
+ int len;
+ const char *errfmt;
+ char *error;
+
+ link[0] = '\0';
+ if (linkid != DATALINK_INVALID_LINKID) {
+ char rsrc[RCM_LINK_RESOURCE_MAX];
+
+ (void) snprintf(rsrc, sizeof (rsrc), "%s/%u",
+ RCM_LINK_PREFIX, linkid);
+
+ rcm_log_message(RCM_ERROR, _("IBPART: %s(%s)\n"), errmsg, rsrc);
+ if ((status = dladm_datalink_id2info(dld_handle, linkid, NULL,
+ NULL, NULL, link, sizeof (link))) != DLADM_STATUS_OK) {
+ rcm_log_message(RCM_WARNING,
+ _("IBPART: cannot get link name for (%s) %s\n"),
+ rsrc, dladm_status2str(status, errstr));
+ }
+ } else {
+ rcm_log_message(RCM_ERROR, _("IBPART: %s\n"), errmsg);
+ }
+
+ errfmt = strlen(link) > 0 ? _("IBPART: %s(%s)") : _("IBPART: %s");
+ len = strlen(errfmt) + strlen(errmsg) + MAXLINKNAMELEN + 1;
+ if ((error = malloc(len)) != NULL) {
+ if (strlen(link) > 0)
+ (void) snprintf(error, len, errfmt, errmsg, link);
+ else
+ (void) snprintf(error, len, errfmt, errmsg);
+ }
+
+ if (errorp != NULL)
+ *errorp = error;
+}
+
+/*
+ * ibpart_consumer_online()
+ *
+ * Notify online to IBPART consumers.
+ */
+/* ARGSUSED */
+static void
+ibpart_consumer_online(rcm_handle_t *hd, link_cache_t *node, char **errorp,
+ uint_t flags, rcm_info_t **info)
+{
+ dl_ibpart_t *ibpart;
+ char rsrc[RCM_LINK_RESOURCE_MAX];
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_online (%s)\n",
+ node->pc_resource);
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ if (!(ibpart->dlib_flags & IBPART_CONSUMER_OFFLINED))
+ continue;
+
+ (void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+ RCM_LINK_PREFIX, ibpart->dlib_ibpart_id);
+
+ if (rcm_notify_online(hd, rsrc, flags, info) == RCM_SUCCESS)
+ ibpart->dlib_flags &= ~IBPART_CONSUMER_OFFLINED;
+ }
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_online done\n");
+}
+
+/*
+ * ibpart_consumer_offline()
+ *
+ * Offline IBPART consumers.
+ */
+static int
+ibpart_consumer_offline(rcm_handle_t *hd, link_cache_t *node, char **errorp,
+ uint_t flags, rcm_info_t **info)
+{
+ dl_ibpart_t *ibpart;
+ char rsrc[RCM_LINK_RESOURCE_MAX];
+ int ret = RCM_SUCCESS;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_offline (%s)\n",
+ node->pc_resource);
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ (void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+ RCM_LINK_PREFIX, ibpart->dlib_ibpart_id);
+
+ ret = rcm_request_offline(hd, rsrc, flags, info);
+ if (ret != RCM_SUCCESS)
+ break;
+
+ ibpart->dlib_flags |= IBPART_CONSUMER_OFFLINED;
+ }
+
+ if (ibpart != NULL)
+ ibpart_consumer_online(hd, node, errorp, flags, info);
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_offline done\n");
+ return (ret);
+}
+
+/*
+ * Send RCM_RESOURCE_LINK_NEW events to other modules about new IBPARTs.
+ * Return 0 on success, -1 on failure.
+ */
+static int
+ibpart_notify_new_ibpart(rcm_handle_t *hd, char *rsrc)
+{
+ link_cache_t *node;
+ dl_ibpart_t *ibpart;
+ nvlist_t *nvl = NULL;
+ uint64_t id;
+ int ret = -1;
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_notify_new_ibpart (%s)\n",
+ rsrc);
+
+ (void) mutex_lock(&cache_lock);
+ if ((node = cache_lookup(hd, rsrc, CACHE_REFRESH)) == NULL) {
+ (void) mutex_unlock(&cache_lock);
+ return (0);
+ }
+
+ if (nvlist_alloc(&nvl, 0, 0) != 0) {
+ (void) mutex_unlock(&cache_lock);
+ rcm_log_message(RCM_WARNING,
+ _("IBPART: failed to allocate nvlist\n"));
+ goto done;
+ }
+
+ for (ibpart = node->pc_ibpart; ibpart != NULL;
+ ibpart = ibpart->dlib_next) {
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_notify_new_ibpart "
+ "add (%u)\n", ibpart->dlib_ibpart_id);
+
+ id = ibpart->dlib_ibpart_id;
+ if (nvlist_add_uint64(nvl, RCM_NV_LINKID, id) != 0) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: failed to construct nvlist\n"));
+ (void) mutex_unlock(&cache_lock);
+ goto done;
+ }
+ }
+ (void) mutex_unlock(&cache_lock);
+
+ if (rcm_notify_event(hd, RCM_RESOURCE_LINK_NEW, 0, nvl, NULL) !=
+ RCM_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IBPART: failed to notify %s event for %s\n"),
+ RCM_RESOURCE_LINK_NEW, node->pc_resource);
+ goto done;
+ }
+
+ ret = 0;
+done:
+ if (nvl != NULL)
+ nvlist_free(nvl);
+ return (ret);
+}
+
+/*
+ * ibpart_consumer_notify() - Notify consumers of IBPARTs coming back online.
+ */
+static int
+ibpart_consumer_notify(rcm_handle_t *hd, datalink_id_t linkid, char **errorp,
+ uint_t flags, rcm_info_t **info)
+{
+ char rsrc[RCM_LINK_RESOURCE_MAX];
+ link_cache_t *node;
+
+ /* Check for the interface in the cache */
+ (void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u", RCM_LINK_PREFIX,
+ linkid);
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_notify(%s)\n",
+ rsrc);
+
+ /*
+ * Inform IP consumers of the new link.
+ */
+ if (ibpart_notify_new_ibpart(hd, rsrc) != 0) {
+ (void) mutex_lock(&cache_lock);
+ if ((node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH)) != NULL) {
+ (void) ibpart_offline_ibpart(node, IBPART_STALE,
+ CACHE_NODE_STALE);
+ }
+ (void) mutex_unlock(&cache_lock);
+ rcm_log_message(RCM_TRACE2,
+ "IBPART: ibpart_notify_new_ibpart failed(%s)\n", rsrc);
+ return (-1);
+ }
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_notify "
+ "succeeded\n");
+ return (0);
+}
+
+typedef struct ibpart_up_arg_s {
+ datalink_id_t linkid;
+ int retval;
+} ibpart_up_arg_t;
+
+static int
+ibpart_up(dladm_handle_t handle, datalink_id_t ibpartid, void *arg)
+{
+ ibpart_up_arg_t *ibpart_up_argp = arg;
+ dladm_status_t status;
+ dladm_ib_attr_t ibpart_attr;
+ char errmsg[DLADM_STRSIZE];
+
+ status = dladm_part_info(handle, ibpartid, &ibpart_attr,
+ DLADM_OPT_PERSIST);
+ if (status != DLADM_STATUS_OK) {
+ rcm_log_message(RCM_TRACE1,
+ "IBPART: ibpart_up(): cannot get information for IBPART %u "
+ "(%s)\n", ibpartid, dladm_status2str(status, errmsg));
+ return (DLADM_WALK_CONTINUE);
+ }
+
+ if (ibpart_attr.dia_physlinkid != ibpart_up_argp->linkid)
+ return (DLADM_WALK_CONTINUE);
+
+ rcm_log_message(RCM_TRACE3, "IBPART: ibpart_up(%u)\n", ibpartid);
+ if ((status = dladm_part_up(handle, ibpartid, 0)) == DLADM_STATUS_OK)
+ return (DLADM_WALK_CONTINUE);
+
+ /*
+ * Prompt the warning message and continue to UP other IBPARTs.
+ */
+ rcm_log_message(RCM_WARNING,
+ _("IBPART: IBPART up failed (%u): %s\n"),
+ ibpartid, dladm_status2str(status, errmsg));
+
+ ibpart_up_argp->retval = -1;
+ return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * ibpart_configure() - Configure IBPARTs over a physical link after it attaches
+ */
+static int
+ibpart_configure(rcm_handle_t *hd, datalink_id_t linkid)
+{
+ char rsrc[RCM_LINK_RESOURCE_MAX];
+ link_cache_t *node;
+ ibpart_up_arg_t arg = {DATALINK_INVALID_LINKID, 0};
+
+ /* Check for the IBPARTs in the cache */
+ (void) snprintf(rsrc, sizeof (rsrc), "%s/%u", RCM_LINK_PREFIX, linkid);
+
+ rcm_log_message(RCM_TRACE2, "IBPART: ibpart_configure(%s)\n", rsrc);
+
+ /* Check if the link is new or was previously offlined */
+ (void) mutex_lock(&cache_lock);
+ if (((node = cache_lookup(hd, rsrc, CACHE_REFRESH)) != NULL) &&
+ (!(node->pc_state & CACHE_NODE_OFFLINED))) {
+ rcm_log_message(RCM_TRACE2,
+ "IBPART: Skipping configured interface(%s)\n", rsrc);
+ (void) mutex_unlock(&cache_lock);
+ return (0);
+ }
+ (void) mutex_unlock(&cache_lock);
+
+ arg.linkid = linkid;
+ (void) dladm_walk_datalink_id(ibpart_up, dld_handle, &arg,
+ DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE, DLADM_OPT_PERSIST);
+
+ if (arg.retval == 0) {
+ rcm_log_message(RCM_TRACE2,
+ "IBPART: ibpart_configure succeeded(%s)\n", rsrc);
+ }
+ return (arg.retval);
+}
--- a/usr/src/cmd/svc/milestone/net-nwam Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/svc/milestone/net-nwam Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
#
. /lib/svc/share/smf_include.sh
@@ -525,6 +524,19 @@
. "${upgrade_script}"
fi
+ #
+ # Upgrade handling for ibd:
+ # After we are done with the upgrade handling, we can not set the
+ # ibd/ibd_upgraded property to "true" as the file system is
+ # read-only at this point. It will be done later by ibd-post-upgrade
+ # service.
+ #
+ ibd_upgraded=`/bin/svcprop -c -p ibd/ibd_upgraded \
+ svc:/network/physical:default 2> /dev/null`
+ if [ "$ibd_upgraded" != "true" ]; then
+ /sbin/ibd_upgrade -v
+ fi
+
# Bring up simnet instances
/sbin/dladm up-simnet
# Initialize security objects.
@@ -536,6 +548,7 @@
#
/sbin/dladm up-vnic
/sbin/dladm up-vlan
+ /sbin/dladm up-part
/sbin/dladm up-aggr
/sbin/flowadm init-flow
fi
--- a/usr/src/cmd/svc/milestone/net-physical Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/svc/milestone/net-physical Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
#
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T.
# All rights reserved.
@@ -62,6 +61,19 @@
fi
#
+ # Upgrade handling for ibd:
+ # After we are done with the upgrade handling, we can not set the
+ # ibd/ibd_upgraded property to "true" as the file system is
+ # read-only at this point. It will be done later by ibd-post-upgrade
+ # service.
+ #
+ ibd_upgraded=`/bin/svcprop -c -p ibd/ibd_upgraded \
+ $SMF_FMRI 2> /dev/null`
+ if [ "$ibd_upgraded" != "true" ]; then
+ /sbin/ibd_upgrade -v
+ fi
+
+ #
# Bring up simnets, link aggregations and initialize security objects.
# Note that link property initialization is deferred until after
# IP interfaces are plumbed to ensure that the links will not
@@ -72,6 +84,7 @@
/sbin/dladm up-simnet
/sbin/dladm up-aggr
/sbin/dladm up-vlan
+ /sbin/dladm up-part
/sbin/dladm init-secobj
#
# Bring up VNICs
--- a/usr/src/lib/libdladm/Makefile Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
#
#
@@ -29,7 +28,8 @@
HDRS = libdladm.h libdladm_impl.h libdllink.h libdlaggr.h \
libdlwlan.h libdlwlan_impl.h libdlvnic.h libdlvlan.h \
libdlmgmt.h libdlflow.h libdlflow_impl.h libdlstat.h \
- libdlether.h libdlsim.h libdlbridge.h libdliptun.h
+ libdlether.h libdlsim.h libdlbridge.h libdliptun.h \
+ libdlib.h
HDRDIR = common
@@ -45,7 +45,8 @@
common/propfuncs.c common/libdlflow.c \
common/libdlstat.c common/flowattr.c \
common/libdlether.c common/libdlsim.c \
- common/libdlbridge.c common/libdliptun.c
+ common/libdlbridge.c common/libdliptun.c\
+ common/libdlib.c
XGETFLAGS = -a -x libdladm.xcl
--- a/usr/src/lib/libdladm/Makefile.com Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/Makefile.com Wed Apr 14 10:26:18 2010 -0700
@@ -19,14 +19,13 @@
# CDDL HEADER END
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
#
LIBRARY = libdladm.a
VERS = .1
OBJECTS = libdladm.o secobj.o linkprop.o libdllink.o libdlaggr.o \
- libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o \
+ libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o libdlib.o\
flowattr.o flowprop.o propfuncs.o libdlflow.o libdlstat.o \
usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o
--- a/usr/src/lib/libdladm/common/libdladm.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdladm.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <unistd.h>
@@ -385,6 +384,27 @@
case DLADM_STATUS_POOLCPU:
s = "pool and cpus property are mutually exclusive";
break;
+ case DLADM_STATUS_INVALID_PORT_INSTANCE:
+ s = "invalid IB phys link";
+ break;
+ case DLADM_STATUS_PORT_IS_DOWN:
+ s = "port is down";
+ break;
+ case DLADM_STATUS_PARTITION_EXISTS:
+ s = "partition already exists";
+ break;
+ case DLADM_STATUS_PKEY_NOT_PRESENT:
+ s = "PKEY is not present on the port";
+ break;
+ case DLADM_STATUS_INVALID_PKEY:
+ s = "invalid PKEY";
+ break;
+ case DLADM_STATUS_NO_IB_HW_RESOURCE:
+ s = "IB internal resource not available";
+ break;
+ case DLADM_STATUS_INVALID_PKEY_TBL_SIZE:
+ s = "invalid PKEY table size";
+ break;
default:
s = "<unknown error>";
break;
@@ -618,6 +638,9 @@
case DATALINK_CLASS_BRIDGE:
s = "bridge";
break;
+ case DATALINK_CLASS_PART:
+ s = "part";
+ break;
default:
s = "unknown";
break;
--- a/usr/src/lib/libdladm/common/libdladm.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdladm.h Wed Apr 14 10:26:18 2010 -0700
@@ -161,7 +161,14 @@
DLADM_STATUS_NO_HWRINGS,
DLADM_STATUS_PERMONLY,
DLADM_STATUS_OPTMISSING,
- DLADM_STATUS_POOLCPU
+ DLADM_STATUS_POOLCPU,
+ DLADM_STATUS_INVALID_PORT_INSTANCE,
+ DLADM_STATUS_PORT_IS_DOWN,
+ DLADM_STATUS_PKEY_NOT_PRESENT,
+ DLADM_STATUS_PARTITION_EXISTS,
+ DLADM_STATUS_INVALID_PKEY,
+ DLADM_STATUS_NO_IB_HW_RESOURCE,
+ DLADM_STATUS_INVALID_PKEY_TBL_SIZE
} dladm_status_t;
typedef enum {
--- a/usr/src/lib/libdladm/common/libdladm_impl.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdladm_impl.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _LIBDLADM_IMPL_H
@@ -103,6 +102,11 @@
#define FSIMNETPEER "simnetpeer" /* uint64_t */
/*
+ * Set for IB partitions only
+ */
+#define FPORTPKEY "pkey" /* uint64_t */
+
+/*
* Common fields
*/
#define FMACADDR "macaddr" /* string */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libdladm/common/libdlib.c Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,732 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <libdevinfo.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <strings.h>
+#include <libintl.h>
+#include <net/if_types.h>
+#include <net/if_dl.h>
+#include <sys/dld.h>
+#include <sys/ib/ib_types.h>
+#include <sys/ibpart.h>
+#include <libdllink.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdladm_impl.h>
+
+/*
+ * IP over IB administration API; see PSARC/2010/085
+ */
+
+/*
+ * Function prototypes
+ */
+dladm_status_t dladm_part_create(dladm_handle_t, datalink_id_t, ib_pkey_t,
+ uint32_t, char *, datalink_id_t *, dladm_arg_list_t *);
+static int dladm_ibd_get_instance(char *);
+static dladm_status_t i_dladm_part_create(dladm_handle_t,
+ dladm_part_attr_t *);
+dladm_status_t dladm_part_persist_conf(dladm_handle_t, dladm_part_attr_t *);
+static dladm_status_t i_dladm_part_delete(dladm_handle_t, datalink_id_t);
+dladm_status_t dladm_part_delete(dladm_handle_t, datalink_id_t, int);
+static int i_dladm_part_up(dladm_handle_t, datalink_id_t, void *);
+dladm_status_t dladm_part_up(dladm_handle_t, datalink_id_t, uint32_t);
+
+/*
+ * Convert a error status returned by the IP over IB kernel driver to a
+ * valid dladm status.
+ */
+static dladm_status_t
+dladm_ib_ioctl_err2status(int err)
+{
+ switch (err) {
+ case 0:
+ return (DLADM_STATUS_OK);
+ case IBD_INVALID_PORT_INST:
+ return (DLADM_STATUS_INVALID_PORT_INSTANCE);
+ case IBD_PORT_IS_DOWN:
+ return (DLADM_STATUS_PORT_IS_DOWN);
+ case IBD_PKEY_NOT_PRESENT:
+ return (DLADM_STATUS_PKEY_NOT_PRESENT);
+ case IBD_PARTITION_EXISTS:
+ return (DLADM_STATUS_PARTITION_EXISTS);
+ case IBD_INVALID_PKEY:
+ return (DLADM_STATUS_INVALID_PKEY);
+ case IBD_NO_HW_RESOURCE:
+ return (DLADM_STATUS_NO_IB_HW_RESOURCE);
+ case IBD_INVALID_PKEY_TBL_SIZE:
+ return (DLADM_STATUS_INVALID_PKEY_TBL_SIZE);
+ default:
+ return (DLADM_STATUS_FAILED);
+ }
+}
+
+static dladm_status_t
+i_dladm_ib_ioctl(dladm_handle_t handle, int ioccmd, ibd_ioctl_t *iocp)
+{
+ if (ioctl(dladm_dld_fd(handle), ioccmd, iocp) == 0)
+ return (DLADM_STATUS_OK);
+
+ if (iocp->ioc_status == 0)
+ return (dladm_errno2status(errno));
+
+ return (dladm_ib_ioctl_err2status(iocp->ioc_status));
+}
+
+/*
+ * Get the active configuration information for the partition given by
+ * the 'linkid'.
+ */
+static dladm_status_t
+i_dladm_part_info_active(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_part_attr_t *attrp)
+{
+ ibpart_ioctl_t ioc;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ bzero(&ioc, sizeof (ioc));
+ bzero(attrp, sizeof (*attrp));
+ /*
+ * The ioc_linkid here will contain the data link id of the IB partition
+ * object.
+ */
+ ioc.ibdioc.ioc_linkid = linkid;
+ ioc.ibdioc.ioc_info_cmd = IBD_INFO_CMD_IBPART;
+
+ status = i_dladm_ib_ioctl(handle, IBD_INFO_IBPART, (ibd_ioctl_t *)&ioc);
+ if (status != DLADM_STATUS_OK)
+ goto bail;
+
+ /*
+ * On return from the ioctl ioc_linkid field contains the IB port's
+ * linkid.
+ */
+ attrp->dia_physlinkid = ioc.ibdioc.ioc_linkid;
+ attrp->dia_partlinkid = ioc.ioc_partid;
+ attrp->dia_pkey = ioc.ioc_pkey;
+ attrp->dia_portnum = ioc.ibdioc.ioc_portnum;
+ attrp->dia_hca_guid = ioc.ibdioc.ioc_hcaguid;
+ attrp->dia_port_guid = ioc.ibdioc.ioc_portguid;
+ attrp->dia_instance = ioc.ibdioc.ioc_port_inst;
+
+ /*
+ * If the IP over IB driver reports that this partition was created
+ * forcibly, then set the force create flag.
+ */
+ if (ioc.ioc_force_create)
+ attrp->dia_flags |= DLADM_IBPART_FORCE_CREATE;
+
+bail:
+ return (status);
+}
+
+/*
+ * Get the configuration information about the IB partition 'linkid' from the
+ * persistent configuration.
+ */
+static dladm_status_t
+i_dladm_part_info_persist(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_part_attr_t *attrp)
+{
+ dladm_conf_t conf;
+ dladm_status_t status;
+ char linkover[MAXLINKNAMELEN];
+ datalink_class_t class;
+ boolean_t force = B_FALSE;
+
+ /* Get the IB partition's datalink ID */
+ if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class,
+ NULL, NULL, 0)) != DLADM_STATUS_OK)
+ goto done;
+
+ bzero(attrp, sizeof (*attrp));
+ attrp->dia_partlinkid = linkid;
+ if ((status = dladm_read_conf(handle, linkid, &conf)) !=
+ DLADM_STATUS_OK)
+ return (status);
+
+ /*
+ * Get the name of the IB Phys link over which IB partition was
+ * created.
+ */
+ status = dladm_get_conf_field(handle, conf, FLINKOVER, linkover,
+ sizeof (linkover));
+ if (status != DLADM_STATUS_OK) {
+ attrp->dia_physlinkid = DATALINK_INVALID_LINKID;
+ goto done;
+ } else {
+ /* Get the IB Phys link's datalink ID */
+ if ((status = dladm_name2info(handle, linkover,
+ &attrp->dia_physlinkid, NULL, NULL, NULL)) !=
+ DLADM_STATUS_OK)
+ goto done;
+ }
+
+ /* Get the IB partition's P_Key */
+ status = dladm_get_conf_field(handle, conf, FPORTPKEY,
+ &attrp->dia_pkey, sizeof (uint64_t));
+ if (status != DLADM_STATUS_OK)
+ goto done;
+
+ if (class != DATALINK_CLASS_PART) {
+ status = DLADM_STATUS_BADARG;
+ goto done;
+ }
+
+ /*
+ * If the FFORCE field is set in the persistent configuration database
+ * set the force create flag in the partition attributes.
+ */
+ status = dladm_get_conf_field(handle, conf, FFORCE, &force,
+ sizeof (boolean_t));
+ if (status != DLADM_STATUS_OK) {
+ if (status != DLADM_STATUS_NOTFOUND)
+ goto done;
+ } else if (force == B_TRUE) {
+ attrp->dia_flags |= DLADM_IBPART_FORCE_CREATE;
+ }
+
+ status = DLADM_STATUS_OK;
+done:
+ dladm_destroy_conf(handle, conf);
+ return (status);
+}
+
+/*
+ * Get the configuration information for the IB partition given by the datalink
+ * ID 'linkid'. Based on the 'flags' field the information is either from the
+ * active system (DLADM_OPT_ACTIVE) or from the persistent configuration
+ * database.
+ */
+dladm_status_t
+dladm_part_info(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_part_attr_t *attrp, uint32_t flags)
+{
+ if (flags == DLADM_OPT_ACTIVE)
+ return (i_dladm_part_info_active(handle, linkid, attrp));
+ else if (flags == DLADM_OPT_PERSIST)
+ return (i_dladm_part_info_persist(handle, linkid, attrp));
+ else
+ return (DLADM_STATUS_BADARG);
+}
+
+/*
+ * Get the configuration information for the IB Phys link given by the datalink
+ * ID 'linkid'.
+ */
+/* ARGSUSED */
+dladm_status_t
+dladm_ib_info(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_ib_attr_t *attrp, uint32_t flags)
+{
+ int instance;
+ ibport_ioctl_t ioc;
+ dladm_phys_attr_t dpa;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ /*
+ * We need to get the device name of the IB Phys link to get the
+ * correct instance number of the IP over IB driver instance.
+ */
+ if (dladm_phys_info(handle, linkid, &dpa, DLADM_OPT_ACTIVE)
+ != DLADM_STATUS_OK)
+ return (DLADM_STATUS_BADARG);
+
+ /*
+ * Get the instance number of the IP over IB driver instance which
+ * represents this IB Phys link.
+ */
+ instance = dladm_ibd_get_instance(dpa.dp_dev);
+ if (instance == -1)
+ return (DLADM_STATUS_FAILED);
+
+ bzero(&ioc, sizeof (ioc));
+ /*
+ * The ioc_linkid here will contain IB port linkid here. We make the
+ * first ioctl call to get the P_Key table size for this HCA port.
+ */
+ ioc.ibdioc.ioc_linkid = linkid;
+ ioc.ibdioc.ioc_info_cmd = IBD_INFO_CMD_PKEYTBLSZ;
+ ioc.ioc_pkey_tbl_sz = 0;
+ ioc.ibdioc.ioc_port_inst = instance;
+
+ status = i_dladm_ib_ioctl(handle, IBD_INFO_IBPART, (ibd_ioctl_t *)&ioc);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ /*
+ * Now allocate the memory for the P_Key table based on the table size
+ * return by the ioctl.
+ */
+ ioc.ioc_pkeys = calloc(sizeof (ib_pkey_t), ioc.ioc_pkey_tbl_sz);
+ if (ioc.ioc_pkeys == NULL) {
+ status = dladm_errno2status(errno);
+ goto bail;
+ }
+
+ /*
+ * Call the ioctl again to get the P_Key table and other IB Phys link
+ * attributes.
+ */
+ ioc.ibdioc.ioc_linkid = linkid;
+ ioc.ibdioc.ioc_port_inst = instance;
+ ioc.ibdioc.ioc_info_cmd = IBD_INFO_CMD_IBPORT;
+
+ status = i_dladm_ib_ioctl(handle, IBD_INFO_IBPART, (ibd_ioctl_t *)&ioc);
+ if (status != DLADM_STATUS_OK)
+ goto bail;
+
+ attrp->dia_physlinkid = ioc.ibdioc.ioc_linkid;
+ attrp->dia_portnum = ioc.ibdioc.ioc_portnum;
+ attrp->dia_port_pkey_tbl_sz = ioc.ioc_pkey_tbl_sz;
+ attrp->dia_port_pkeys = ioc.ioc_pkeys;
+ attrp->dia_hca_guid = ioc.ibdioc.ioc_hcaguid;
+ attrp->dia_port_guid = ioc.ibdioc.ioc_portguid;
+ attrp->dia_instance = ioc.ibdioc.ioc_port_inst;
+ return (status);
+bail:
+ free(ioc.ioc_pkeys);
+ return (status);
+}
+
+/*
+ * Free the memory allocated for the IB HCA port's P_Key table by
+ * dladm_ib_info library call.
+ */
+void
+dladm_free_ib_info(dladm_ib_attr_t *attr)
+{
+ if (attr && attr->dia_port_pkeys)
+ free(attr->dia_port_pkeys);
+}
+
+/*
+ * Call into the IP over IB driver to create a partition object.
+ */
+static dladm_status_t
+i_dladm_part_create(dladm_handle_t handle, dladm_part_attr_t *pattr)
+{
+ ibpart_ioctl_t ioc;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ bzero(&ioc, sizeof (ioc));
+
+ /* IB Physical datalink ID */
+ ioc.ibdioc.ioc_linkid = pattr->dia_physlinkid;
+ /* IB Partition datalink ID */
+ ioc.ioc_partid = pattr->dia_partlinkid;
+ ioc.ioc_pkey = pattr->dia_pkey;
+ ioc.ibdioc.ioc_port_inst = pattr->dia_instance;
+ ioc.ioc_force_create = ((pattr->dia_flags & DLADM_OPT_FORCE)
+ != 0);
+
+ status = i_dladm_ib_ioctl(handle, IBD_CREATE_IBPART,
+ (ibd_ioctl_t *)&ioc);
+ return (status);
+}
+
+/*
+ * Create an entry in the dladm persistent configuration database for the
+ * partition specified by pattr.
+ */
+dladm_status_t
+dladm_part_persist_conf(dladm_handle_t handle, dladm_part_attr_t *pattr)
+{
+
+ dladm_conf_t conf;
+ dladm_status_t status;
+ char linkover[MAXLINKNAMELEN];
+ uint64_t u64;
+
+ status = dladm_create_conf(handle, pattr->dia_pname,
+ pattr->dia_partlinkid, DATALINK_CLASS_PART, DL_IB, &conf);
+
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ /*
+ * Get the name of the IB Phys link over which this partition was
+ * created.
+ */
+ status = dladm_datalink_id2info(handle, pattr->dia_physlinkid,
+ NULL, NULL, NULL, linkover, sizeof (linkover));
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ /* Store IB Phys link name (linkover) */
+ status = dladm_set_conf_field(handle, conf, FLINKOVER, DLADM_TYPE_STR,
+ linkover);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ u64 = pattr->dia_pkey;
+
+ /* Store the IB Partitions P_Key */
+ status = dladm_set_conf_field(handle, conf, FPORTPKEY,
+ DLADM_TYPE_UINT64, &u64);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ if (pattr->dia_flags & DLADM_OPT_FORCE) {
+ boolean_t force = B_TRUE;
+ /* Store the force create flag. */
+ status = dladm_set_conf_field(handle, conf, FFORCE,
+ DLADM_TYPE_BOOLEAN, &force);
+ if (status != DLADM_STATUS_OK)
+ goto done;
+ }
+
+ status = dladm_write_conf(handle, conf);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ dladm_destroy_conf(handle, conf);
+done:
+ return (status);
+}
+
+/*
+ * Create a new IB Partition datalink of name 'pname' over the IB Physical link
+ * given in 'physlinkid' with the P_key 'pkey' and return the datalink ID in
+ * 'partlinkid'. If the 'force' option is set in the 'flags' argument, the
+ * partition will be created even if the P_Key 'pkey' does not exist or if the
+ * HCA port represented by the IB Phys link is down. If the 'temporary' flag is
+ * set, then the configuration information is not added to the persistent
+ * database.
+ */
+dladm_status_t
+dladm_part_create(dladm_handle_t handle, datalink_id_t physlinkid,
+ ib_pkey_t pkey, uint32_t flags, char *pname, datalink_id_t *partlinkid,
+ dladm_arg_list_t *proplist)
+{
+ int i;
+ dladm_status_t status;
+ uint_t media;
+ boolean_t part_created = B_FALSE;
+ boolean_t conf_set = B_FALSE;
+ dladm_phys_attr_t dpa;
+ dladm_part_attr_t pattr;
+
+ pattr.dia_pkey = pkey;
+ pattr.dia_physlinkid = physlinkid; /* IB Phys link's datalink id */
+ pattr.dia_flags = flags;
+ pattr.dia_pname = pname;
+
+ flags &= ~DLADM_OPT_FORCE;
+
+ /*
+ * Check whether the PKEY is valid. If not, return immediately
+ * Only full members are allowed as per the IPoIB specification
+ */
+ if (pattr.dia_pkey <= IB_PKEY_INVALID_FULL)
+ return (DLADM_STATUS_INVALID_PKEY);
+
+ /*
+ * Get the media type of the Phys link datalink ID provided and
+ * make sure that it is Infiniband media DL_IB)
+ */
+ if ((status = dladm_datalink_id2info(handle, pattr.dia_physlinkid, NULL,
+ NULL, &media, NULL, 0)) != DLADM_STATUS_OK)
+ return (status);
+
+ if (media != DL_IB)
+ return (dladm_errno2status(ENOTSUP));
+
+ /*
+ * Get the instance number of the IP over IB driver instance which the
+ * IB Phys link 'physlinkid' over which we will be creating our IB
+ * partition.
+ */
+ if ((status = dladm_phys_info(handle, pattr.dia_physlinkid, &dpa,
+ DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK)
+ return (status);
+
+ pattr.dia_instance = dladm_ibd_get_instance(dpa.dp_dev);
+ if (pattr.dia_instance == -1)
+ return (DLADM_STATUS_FAILED);
+
+
+ if ((status = dladm_create_datalink_id(handle, pattr.dia_pname,
+ DATALINK_CLASS_PART, DL_IB, flags, &pattr.dia_partlinkid)) !=
+ DLADM_STATUS_OK)
+ return (status);
+
+ /*
+ * Create the IB partition object.
+ */
+ status = i_dladm_part_create(handle, &pattr);
+ if (status != DLADM_STATUS_OK)
+ goto done;
+
+ part_created = B_TRUE;
+
+ /*
+ * If the persist flag is set then write this partition information
+ * to the persistent configuration.
+ */
+ if (pattr.dia_flags & DLADM_OPT_PERSIST) {
+ status = dladm_part_persist_conf(handle, &pattr);
+ if (status != DLADM_STATUS_OK)
+ goto done;
+ conf_set = B_TRUE;
+ }
+
+ /*
+ * If the name-value pair list of properties were provided set those
+ * properties over the datalink.
+ */
+ if (proplist != NULL) {
+ for (i = 0; i < proplist->al_count; i++) {
+ dladm_arg_info_t *aip = &proplist->al_info[i];
+
+ status = dladm_set_linkprop(handle,
+ pattr.dia_partlinkid, aip->ai_name, aip->ai_val,
+ aip->ai_count, pattr.dia_flags);
+ if (status != DLADM_STATUS_OK)
+ break;
+ }
+ }
+done:
+ if (status != DLADM_STATUS_OK) {
+ if (conf_set)
+ (void) dladm_remove_conf(handle, pattr.dia_partlinkid);
+ if (part_created)
+ (void) i_dladm_part_delete(handle,
+ pattr.dia_partlinkid);
+ (void) dladm_destroy_datalink_id(handle, pattr.dia_partlinkid,
+ flags);
+ }
+
+ if (partlinkid != NULL)
+ *partlinkid = pattr.dia_partlinkid;
+
+ return (status);
+}
+
+/*
+ * Call into the IP over IB driver to delete the IB partition and free up all
+ * the resources allocated for it.
+ */
+static dladm_status_t
+i_dladm_part_delete(dladm_handle_t handle, datalink_id_t partid)
+{
+ ibpart_ioctl_t ioc;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ bzero(&ioc, sizeof (ioc));
+ ioc.ioc_partid = partid;
+ status = i_dladm_ib_ioctl(handle, IBD_DELETE_IBPART,
+ (ibd_ioctl_t *)&ioc);
+ return (status);
+}
+
+/*
+ * Delete an IB partition if 'flags' contains the active flag. Update the
+ * persistent configuration if 'flags' contains the persist flag.
+ */
+dladm_status_t
+dladm_part_delete(dladm_handle_t handle, datalink_id_t partid, int flags)
+{
+ dladm_status_t status = DLADM_STATUS_OK;
+ datalink_class_t class;
+
+ if (flags == 0)
+ return (DLADM_STATUS_BADARG);
+
+ /*
+ * Make sure that the datalinkid provided is an IB partition class
+ * datalink ID.
+ */
+ if ((dladm_datalink_id2info(handle, partid, NULL, &class, NULL, NULL, 0)
+ != DLADM_STATUS_OK))
+ return (DLADM_STATUS_BADARG);
+
+ if (class != DATALINK_CLASS_PART)
+ return (DLADM_STATUS_BADARG);
+
+ if ((flags & DLADM_OPT_ACTIVE) != 0) {
+ status = i_dladm_part_delete(handle, partid);
+ if (status == DLADM_STATUS_OK) {
+ (void) dladm_set_linkprop(handle, partid, NULL, NULL, 0,
+ DLADM_OPT_ACTIVE);
+ (void) dladm_destroy_datalink_id(handle, partid,
+ DLADM_OPT_ACTIVE);
+ } else if (status != DLADM_STATUS_NOTFOUND ||
+ !(flags & DLADM_OPT_PERSIST)) {
+ return (status);
+ }
+ }
+
+ if ((flags & DLADM_OPT_PERSIST) != 0) {
+ dladm_status_t db_status;
+ db_status = dladm_remove_conf(handle, partid);
+
+ /*
+ * A partition could have been temporarily deleted in which
+ * case the delete of the active partition above would have
+ * failed. In that case, we update the status to be returned
+ * to that of the status returned for deleting the persistent
+ * database entry.
+ */
+ if (status == DLADM_STATUS_NOTFOUND)
+ status = db_status;
+
+ (void) dladm_destroy_datalink_id(handle, partid,
+ DLADM_OPT_PERSIST);
+ }
+
+ return (status);
+}
+
+/*
+ * Call into the IP over IB driver to create the active instances of one or all
+ * IB partitions present in the persistent configuration.
+ */
+/* ARGSUSED */
+static int
+i_dladm_part_up(dladm_handle_t handle, datalink_id_t plinkid, void *arg)
+{
+ dladm_conf_t conf;
+ datalink_id_t linkid;
+ ib_pkey_t pkey;
+ uint64_t u64;
+ char linkover[MAXLINKNAMELEN];
+ dladm_status_t status;
+ dladm_phys_attr_t dpa;
+ dladm_part_attr_t pattr;
+
+ /*
+ * plinkid is the IB partition datalink's ID. Get an handle to the
+ * persistent configuration entry for this datalink ID. If this datalink
+ * ID is not present in the persistent configuration return.
+ */
+ if ((status = dladm_read_conf(handle, plinkid, &conf)) !=
+ DLADM_STATUS_OK)
+ return (status);
+
+ /*
+ * Get the name of the IB Phys link over which this partition was
+ * created.
+ */
+ status = dladm_get_conf_field(handle, conf, FLINKOVER, linkover,
+ sizeof (linkover));
+ if (status != DLADM_STATUS_OK)
+ goto done;
+
+ if ((status = dladm_name2info(handle, linkover, &linkid, NULL, NULL,
+ NULL)) != DLADM_STATUS_OK)
+ goto done;
+
+ /*
+ * Get the phys attribute of the IB Phys link to get the device name
+ * associated with the phys link. We need this to get the IP over IB
+ * driver instance number.
+ */
+ if (dladm_phys_info(handle, linkid, &dpa, DLADM_OPT_ACTIVE)
+ != DLADM_STATUS_OK)
+ goto done;
+
+ /* Get the IB partition's P_key */
+ status = dladm_get_conf_field(handle, conf, FPORTPKEY, &u64,
+ sizeof (u64));
+ if (status != DLADM_STATUS_OK)
+ goto done;
+
+ pkey = (ib_pkey_t)u64;
+
+ /*
+ * We always set the force flag during dladm_part_up because we want
+ * the partition creation to succeed even if the IB HCA port over which
+ * the partition is being created is still down. Since dladm_part_up
+ * is usually invoked during early boot sequence, it is possible under
+ * some IB subnet configurations for dladm_up_part to be called before
+ * the IB link negotiation is completed and port state is set to active
+ * and P_Key table is updated.
+ */
+ pattr.dia_flags = DLADM_OPT_FORCE | DLADM_OPT_ACTIVE |
+ DLADM_OPT_PERSIST;
+ /* IB Phys link's datalink ID. */
+ pattr.dia_physlinkid = linkid;
+ /* IB Partition's datalink ID. */
+ pattr.dia_partlinkid = plinkid;
+ pattr.dia_pkey = pkey;
+ pattr.dia_instance = dladm_ibd_get_instance(dpa.dp_dev);
+ if (pattr.dia_instance == -1)
+ return (DLADM_WALK_CONTINUE);
+
+ /* Create the active IB Partition object. */
+ if (i_dladm_part_create(handle, &pattr) == DLADM_STATUS_OK &&
+ dladm_up_datalink_id(handle, plinkid) != DLADM_STATUS_OK)
+ (void) i_dladm_part_delete(handle, linkid);
+
+done:
+ dladm_destroy_conf(handle, conf);
+ return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * Bring up one or all IB partition(s) present in the persistent configuration
+ * database. If we need to bring up one IB Partition, its datalink ID is
+ * provided in 'linkid'.
+ */
+/* ARGSUSED */
+dladm_status_t
+dladm_part_up(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags)
+{
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ if (linkid == DATALINK_ALL_LINKID) {
+ (void) dladm_walk_datalink_id(i_dladm_part_up, handle,
+ &status, DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE,
+ DLADM_OPT_PERSIST);
+ return (DLADM_STATUS_OK);
+ } else {
+ (void) i_dladm_part_up(handle, linkid, &status);
+ return (status);
+ }
+}
+
+static int
+dladm_ibd_get_instance(char *devname)
+{
+ int instance;
+
+ /*
+ * The devname contains the driver name followed by the instance
+ * number. Lets just skip the driver name and get the instance. We use
+ * strlen of ibp here to get the driver name length.
+ */
+ instance = atoi(devname + strlen("ibp"));
+
+ return (instance);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libdladm/common/libdlib.h Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _LIBDLIB_H
+#define _LIBDLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ib/ib_types.h>
+
+#define MAXPKEYSTRSZ 968
+#define MAXPKEYLEN 6
+#define IBGUIDSTRLEN 16
+#define IBPORTSTRLEN 5
+
+#define DLADM_IBPART_FORCE_CREATE 0x1
+
+typedef struct dladm_ib_attr_s {
+ datalink_id_t dia_physlinkid; /* IB Phys link datalink ID */
+ datalink_id_t dia_partlinkid; /* IB Partition datalink ID */
+ ib_pkey_t dia_pkey; /* IB partitions P_Key */
+ uint32_t dia_flags;
+ char *dia_devname; /* IB Phys link's device name */
+ char *dia_pname; /* IB partition's name */
+ uint_t dia_portnum; /* IB Phys link's HCA port number */
+ int dia_instance; /* IP over IB driver instance number */
+ ib_guid_t dia_hca_guid; /* IB HCA GUID */
+ ib_guid_t dia_port_guid; /* IB HCA Port GUID */
+ uint_t dia_port_pkey_tbl_sz;
+ ib_pkey_t *dia_port_pkeys; /* Ptr to the P_Key table */
+} dladm_ib_attr_t;
+
+typedef struct dladm_ib_attr_s dladm_part_attr_t;
+
+typedef enum {
+ DLADM_IBPART_UD_MODE = 0,
+ DLADM_IBPART_CM_MODE
+} dladm_ibpart_linkmode_t;
+
+extern dladm_status_t dladm_part_create(dladm_handle_t, datalink_id_t,
+ ib_pkey_t, uint32_t, char *, datalink_id_t *, dladm_arg_list_t *);
+extern dladm_status_t dladm_part_delete(dladm_handle_t, datalink_id_t, int);
+extern dladm_status_t dladm_part_up(dladm_handle_t, datalink_id_t, uint32_t);
+extern dladm_status_t dladm_part_info(dladm_handle_t, datalink_id_t,
+ dladm_part_attr_t *, uint32_t);
+extern dladm_status_t dladm_ib_info(dladm_handle_t, datalink_id_t,
+ dladm_ib_attr_t *, uint32_t);
+extern void dladm_free_ib_info(dladm_ib_attr_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBDLIB_H */
--- a/usr/src/lib/libdladm/common/libdllink.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdllink.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
@@ -40,6 +39,7 @@
#include <libdlaggr.h>
#include <libdlvlan.h>
#include <libdlvnic.h>
+#include <libdlib.h>
#include <libdllink.h>
#include <libdlmgmt.h>
#include <libdladm_impl.h>
@@ -691,6 +691,22 @@
}
static int
+i_dladm_part_link_del(dladm_handle_t handle, datalink_id_t partid, void *arg)
+{
+ consumer_del_phys_arg_t *del_arg = arg;
+ dladm_part_attr_t pinfo;
+ dladm_status_t status;
+
+ status = dladm_part_info(handle, partid, &pinfo, DLADM_OPT_PERSIST);
+ if (status != DLADM_STATUS_OK)
+ return (DLADM_WALK_CONTINUE);
+
+ if (pinfo.dia_physlinkid == del_arg->linkid)
+ (void) dladm_part_delete(handle, partid, DLADM_OPT_PERSIST);
+ return (DLADM_WALK_CONTINUE);
+}
+
+static int
i_dladm_aggr_link_del(dladm_handle_t handle, datalink_id_t aggrid, void *arg)
{
consumer_del_phys_arg_t *del_arg = arg;
@@ -766,6 +782,10 @@
(void) dladm_walk_datalink_id(i_dladm_vlan_link_del, handle,
&del_arg, DATALINK_CLASS_VLAN, DATALINK_ANY_MEDIATYPE,
DLADM_OPT_PERSIST);
+ } else if (media == DL_IB) {
+ del_arg.linkid = linkid;
+ (void) dladm_walk_datalink_id(i_dladm_part_link_del, handle,
+ &del_arg, DATALINK_CLASS_PART, DL_IB, DLADM_OPT_PERSIST);
}
(void) dladm_remove_conf(handle, linkid);
--- a/usr/src/lib/libdladm/common/linkprop.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/linkprop.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <stdlib.h>
@@ -42,6 +41,7 @@
#include <libdlwlan.h>
#include <libdlvlan.h>
#include <libdlvnic.h>
+#include <libdlib.h>
#include <libintl.h>
#include <dlfcn.h>
#include <link.h>
@@ -150,7 +150,7 @@
get_bridge_pvid, get_protection, get_rxrings,
get_txrings, get_cntavail,
get_allowedips, get_allowedcids, get_pool,
- get_rings_range;
+ get_rings_range, get_linkmode_prop;
static pd_setf_t set_zone, set_rate, set_powermode, set_radio,
set_public_prop, set_resource, set_stp_prop,
@@ -360,6 +360,8 @@
{ MAC_PROP_MAX_TXHWCLNT_AVAIL, sizeof (uint_t), "txhwclnt-available"},
+ { MAC_PROP_IB_LINKMODE, sizeof (uint32_t), "linkmode"},
+
{ MAC_PROP_PRIVATE, 0, "driver-private"}
};
@@ -431,6 +433,11 @@
{ "auto", P2P_AUTO }
};
+static val_desc_t dladm_ibpart_linkmode_vals[] = {
+ { "cm", DLADM_IBPART_CM_MODE },
+ { "ud", DLADM_IBPART_UD_MODE },
+};
+
#define VALCNT(vals) (sizeof ((vals)) / sizeof (val_desc_t))
#define RESET_VAL ((uintptr_t)-1)
#define UNSPEC_VAL ((uintptr_t)-2)
@@ -453,6 +460,11 @@
get_radio, NULL, 0,
DATALINK_CLASS_PHYS, DL_WIFI },
+ { "linkmode", { "cm", DLADM_IBPART_CM_MODE },
+ dladm_ibpart_linkmode_vals, VALCNT(dladm_ibpart_linkmode_vals),
+ set_public_prop, NULL, get_linkmode_prop, NULL, 0,
+ DATALINK_CLASS_PART, DL_IB },
+
{ "speed", { "", 0 }, NULL, 0,
set_rate, get_rate_mod,
get_rate, check_rate, 0,
@@ -4436,3 +4448,35 @@
free(buf);
return (status);
}
+
+/* ARGSUSED */
+static dladm_status_t
+get_linkmode_prop(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, char **prop_val, uint_t *val_cnt,
+ datalink_media_t media, uint_t flags, uint_t *perm_flags)
+{
+ char *s;
+ uint32_t v;
+ dladm_status_t status;
+
+ status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags,
+ perm_flags, &v, sizeof (v));
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ switch (v) {
+ case DLADM_IBPART_CM_MODE:
+ s = "cm";
+ break;
+ case DLADM_IBPART_UD_MODE:
+ s = "ud";
+ break;
+ default:
+ s = "";
+ break;
+ }
+ (void) snprintf(prop_val[0], DLADM_STRSIZE, "%s", s);
+
+ *val_cnt = 1;
+ return (DLADM_STATUS_OK);
+}
--- a/usr/src/lib/libdladm/common/llib-ldladm Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/llib-ldladm Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*LINTLIBRARY*/
@@ -31,6 +30,7 @@
#include <libdlwlan.h>
#include <libdlvnic.h>
#include <libdlvlan.h>
+#include <libdlib.h>
#include <libdliptun.h>
#include <libdlmgmt.h>
#include <libdlflow.h>
--- a/usr/src/lib/libdladm/common/mapfile-vers Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/mapfile-vers Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
#
#
@@ -256,6 +255,13 @@
dladm_flow_stat_free;
dladm_flow_stat_query_all;
dladm_flow_stat_query_all_free;
+
+ dladm_part_create;
+ dladm_part_delete;
+ dladm_part_info;
+ dladm_part_up;
+ dladm_ib_info;
+ dladm_free_ib_info;
local:
*;
};
--- a/usr/src/lib/udapl/udapl_tavor/Makefile.com Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/Makefile.com Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
LIBRARY= udapl_tavor.a
@@ -133,7 +132,7 @@
include $(SRC)/lib/Makefile.lib
LIBS = $(DYNLIB)
-LDLIBS += -ldevinfo -lsocket -lnsl -ldat -lc
+LDLIBS += -ldevinfo -lsocket -lnsl -ldat -lc -ldladm
SRCDIR = ../common
TAVORSRCDIR = ../tavor
--- a/usr/src/lib/udapl/udapl_tavor/common/dapl_name_service.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/common/dapl_name_service.c Wed Apr 14 10:26:18 2010 -0700
@@ -24,8 +24,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -299,7 +298,6 @@
* DAT_INTERNAL_ERROR
*/
-#define IBD_NAME "ibd"
#define NS_MAX_RETRIES 60
DAT_RETURN
@@ -464,9 +462,7 @@
}
(void) dapl_os_memzero(&lifr, sizeof (lifr));
(void) dapl_os_memcpy(&lifr.lifr_nd.lnr_addr, addr, sizeof (*addr));
- (void) dapl_os_strcpy(lifr.lifr_name, IBD_NAME);
- (void) sprintf(&lifr.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
- ia_ptr->hca_ptr->hca_ibd_inst);
+ (void) dapl_os_strcpy(lifr.lifr_name, ia_ptr->hca_ptr->name);
again:;
if (ioctl(s, SIOCLIFGETND, (caddr_t)&lifr) < 0) {
@@ -550,9 +546,7 @@
int retval;
uint32_t netmask, netaddr, netaddr_dest;
- (void) dapl_os_strcpy(lifreq.lifr_name, IBD_NAME);
- (void) sprintf(&lifreq.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
- ia_ptr->hca_ptr->hca_ibd_inst);
+ (void) dapl_os_strcpy(lifreq.lifr_name, ia_ptr->hca_ptr->name);
retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
if (retval < 0) {
@@ -597,9 +591,7 @@
uchar_t *netmask, *local_addr, *dest_addr;
int i, retval;
- (void) dapl_os_strcpy(lifreq.lifr_name, IBD_NAME);
- (void) sprintf(&lifreq.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
- ia_ptr->hca_ptr->hca_ibd_inst);
+ (void) dapl_os_strcpy(lifreq.lifr_name, ia_ptr->hca_ptr->name);
retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
if (retval < 0) {
--- a/usr/src/lib/udapl/udapl_tavor/include/dapl.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/include/dapl.h Wed Apr 14 10:26:18 2010 -0700
@@ -24,8 +24,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -327,7 +326,6 @@
ib_uint32_t partition_max;
ib_uint32_t partition_key;
ib_uint32_t tavor_idx;
- ib_uint32_t hca_ibd_inst;
ib_guid_t node_GUID;
ib_lid_t lid;
int max_inline_send;
--- a/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hca.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hca.c Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
@@ -36,14 +35,18 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
-#include <libdevinfo.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdllink.h>
+#include <sys/ib/ibnex/ibnex_devctl.h>
#include "dapl.h"
#include "dapl_adapter_util.h"
#include "dapl_tavor_ibtf_impl.h"
#include "dapl_hca_util.h"
#include "dapl_name_service.h"
-#define IF_NAME "ibd"
#define MAX_HCAS 64
#define PROP_HCA_GUID "hca-guid"
#define PROP_PORT_NUM "port-number"
@@ -52,9 +55,9 @@
#define DEVDAPLT "/dev/daplt"
/* function prototypes */
-static DAT_RETURN dapli_process_tavor_node(di_node_t node, int *hca_idx,
+static DAT_RETURN dapli_process_tavor_node(char *dev_path, int *hca_idx,
int try_blueflame);
-static DAT_RETURN dapli_process_ibd_node(di_node_t node, DAPL_HCA *hca_ptr,
+static DAT_RETURN dapli_process_ia(dladm_ib_attr_t *ib_attr, DAPL_HCA *hca_ptr,
int hca_idx);
#if defined(IBHOSTS_NAMING)
@@ -70,63 +73,75 @@
dapli_init_hca(
IN DAPL_HCA *hca_ptr)
{
- di_node_t root_node;
- di_node_t hca_node;
- di_node_t ibd_node;
- DAT_RETURN dat_status = DAT_SUCCESS;
- int hca_idx = 0;
- int ia_instance;
- int check_for_bf = 0;
+ DAT_RETURN dat_status = DAT_SUCCESS;
+ int hca_idx = 0;
+ int check_for_bf = 0;
+ datalink_class_t class;
+ datalink_id_t linkid;
+ dladm_ib_attr_t ib_attr;
+ ibnex_ctl_query_hca_t query_hca;
+ int ibnex_fd = -1;
+ dladm_handle_t dlh;
+ char hca_device_path[MAXPATHLEN];
- ia_instance = (int)dapl_os_strtol(hca_ptr->name + strlen(IF_NAME),
- NULL, 0);
-
- root_node = di_init("/", DINFOCPYALL);
- if (root_node == DI_NODE_NIL) {
+ if (dladm_open(&dlh) != DLADM_STATUS_OK) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "init_hca: di_init failed %s\n", strerror(errno));
+ "init_hca: dladm_open failed\n");
return (DAT_INTERNAL_ERROR);
}
- ibd_node = di_drv_first_node(IF_NAME, root_node);
- while (ibd_node != DI_NODE_NIL) {
- /* find the ibd node matching our ianame */
- if (di_instance(ibd_node) == ia_instance) {
- break;
- }
- ibd_node = di_drv_next_node(ibd_node);
- }
-
- if (ibd_node == DI_NODE_NIL) {
+ if ((ibnex_fd = open(IBNEX_DEVCTL_DEV, O_RDONLY)) < 0) {
dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "init_hcas: ibd%d di_node not found\n", ia_instance);
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "init_hca: could not open ib nexus (%s)\n",
+ strerror(errno));
goto bail;
}
- hca_node = di_parent_node(ibd_node);
- if ((hca_node != DI_NODE_NIL) && (strncmp(di_driver_name(hca_node),
- "tavor", strlen("tavor")) == 0))
+ if ((dladm_name2info(dlh, hca_ptr->name, &linkid, NULL, &class,
+ NULL) != DLADM_STATUS_OK) ||
+ (class != DATALINK_CLASS_PART) ||
+ (dladm_part_info(dlh, linkid, &ib_attr,
+ DLADM_OPT_ACTIVE) != DLADM_STATUS_OK)) {
+ dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "init_hca: %s not found - couldn't get partition info\n",
+ hca_ptr->name);
+ goto bail;
+ }
+
+ bzero(&query_hca, sizeof (query_hca));
+ query_hca.hca_guid = ib_attr.dia_hca_guid;
+ query_hca.hca_device_path = hca_device_path;
+ query_hca.hca_device_path_alloc_sz = sizeof (hca_device_path);
+ if (ioctl(ibnex_fd, IBNEX_CTL_QUERY_HCA, &query_hca) == -1) {
+ dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "init_hca: %s not found; query_hca failed\n",
+ hca_ptr->name);
+ goto bail;
+ }
+
+ if (strcmp(query_hca.hca_info.hca_driver_name, "tavor") == 0)
dapls_init_funcs_tavor(hca_ptr);
- else if ((hca_node != DI_NODE_NIL) && (strncmp(di_driver_name
- (hca_node), "arbel", strlen("arbel")) == 0))
+ else if (strcmp(query_hca.hca_info.hca_driver_name, "arbel") == 0)
dapls_init_funcs_arbel(hca_ptr);
- else if ((hca_node != DI_NODE_NIL) && (strncmp(di_driver_name
- (hca_node), "hermon", strlen("hermon")) == 0)) {
+ else if (strcmp(query_hca.hca_info.hca_driver_name, "hermon") == 0) {
dapls_init_funcs_hermon(hca_ptr);
check_for_bf = 1;
} else {
dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "init_hcas: ibd%d hca_node not found\n", ia_instance);
+ "init_hca: %s not found\n", hca_ptr->name);
goto bail;
}
- dat_status = dapli_process_tavor_node(hca_node, &hca_idx, check_for_bf);
+ dat_status = dapli_process_tavor_node(hca_device_path, &hca_idx,
+ check_for_bf);
if (dat_status != DAT_SUCCESS) {
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "init_hcas: ibd%d process_tavor_node failed(0x%x)\n",
- ia_instance, dat_status);
+ "init_hcas: %s process_tavor_node failed(0x%x)\n",
+ hca_ptr->name, dat_status);
goto bail;
}
@@ -136,27 +151,28 @@
dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
}
#else
- dat_status = dapli_process_ibd_node(ibd_node, hca_ptr, hca_idx);
+ dat_status = dapli_process_ia(&ib_attr, hca_ptr, hca_idx);
#endif
if (dat_status != DAT_SUCCESS) {
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "init_hcas: ibd%d process_ibd_node failed(0x%x)\n",
- ia_instance, dat_status);
+ "init_hcas: %s process_ia failed(0x%x)\n",
+ hca_ptr->name, dat_status);
goto bail;
}
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "init_hcas: done ibd%d\n", ia_instance);
+ "init_hcas: done %s\n", hca_ptr->name);
bail:
- di_fini(root_node);
+ if (ibnex_fd != -1)
+ (void) close(ibnex_fd);
+ dladm_close(dlh);
return (dat_status);
}
static DAT_RETURN
-dapli_process_tavor_node(di_node_t node, int *hca_idx, int try_blueflame)
+dapli_process_tavor_node(char *dev_path, int *hca_idx, int try_blueflame)
{
- char *dev_path;
char path_buf[MAXPATHLEN];
int i, idx, fd;
#ifndef _LP64
@@ -183,7 +199,6 @@
dapl_os_unlock(&g_tavor_state_lock);
return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
}
- dev_path = di_devfs_path(node);
for (i = 0; i < idx; i++) {
if (strcmp(dev_path, g_tavor_state[i].hca_path) == 0) {
@@ -199,7 +214,6 @@
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
"process_tavor: devfs path %s is too long\n",
dev_path);
- di_devfs_path_free(dev_path);
dapl_os_unlock(&g_tavor_state_lock);
return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
}
@@ -207,7 +221,6 @@
(void) dapl_os_strcat(path_buf, dev_path);
(void) dapl_os_strcat(path_buf, ":devctl");
(void) dapl_os_strcpy(g_tavor_state[idx].hca_path, dev_path);
- di_devfs_path_free(dev_path);
pagesize = (size_t)sysconf(_SC_PAGESIZE);
if (pagesize == 0) {
@@ -294,49 +307,18 @@
}
static DAT_RETURN
-dapli_process_ibd_node(di_node_t node, DAPL_HCA *hca_ptr, int hca_idx)
+dapli_process_ia(dladm_ib_attr_t *ib_attr, DAPL_HCA *hca_ptr, int hca_idx)
{
- di_prop_t prop;
- ib_guid_t hca_guid = 0;
struct lifreq lifreq;
- uint32_t port_num = 0;
- uint32_t partition_key = 0;
- int instance, sfd, retval, af;
- int tmp;
- int digits;
- char *drv_name;
+ int sfd, retval, af;
char addr_buf[64];
- prop = di_prop_next(node, DI_PROP_NIL);
- while (prop != DI_PROP_NIL) {
- char *prop_name;
- uchar_t *bytep;
- int *intp, count;
-
- prop_name = di_prop_name(prop);
- count = 0;
-
- if (strcmp(prop_name, PROP_HCA_GUID) == 0) {
- count = di_prop_bytes(prop, &bytep);
- dapl_os_assert(count == sizeof (ib_guid_t));
- (void) dapl_os_memcpy((void *)&hca_guid, (void *)bytep,
- sizeof (ib_guid_t));
- } else if (strcmp(prop_name, PROP_PORT_NUM) == 0) {
- count = di_prop_ints(prop, &intp);
- dapl_os_assert(count == 1);
- port_num = (uint32_t)intp[0];
- } else if (strcmp(prop_name, PROP_PORT_PKEY) == 0) {
- count = di_prop_ints(prop, &intp);
- dapl_os_assert(count == 1);
- partition_key = (uint32_t)intp[0];
- }
- prop = di_prop_next(node, prop);
- }
- if (hca_guid == 0 || port_num == 0 || partition_key == 0) {
+ if (ib_attr->dia_hca_guid == 0 || ib_attr->dia_portnum == 0 ||
+ ib_attr->dia_pkey == 0) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "process_ibd: invalid properties: guid 0x%016llx, "
- "port %d, pkey 0x%08x\n", hca_guid, port_num,
- partition_key);
+ "process_ia: invalid properties: guid 0x%016llx, "
+ "port %d, pkey 0x%08x\n", ib_attr->dia_hca_guid,
+ ib_attr->dia_portnum, (uint_t)ib_attr->dia_pkey);
return (DAT_ERROR(DAT_INVALID_PARAMETER, 0));
}
@@ -349,31 +331,20 @@
sfd = socket(af, SOCK_DGRAM, 0);
if (sfd < 0) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "process_ibd: socket failed: %s\n", strerror(errno));
+ "process_ia: socket failed: %s\n", strerror(errno));
return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
}
- instance = di_instance(node);
- drv_name = di_driver_name(node);
- /* calculate the number of digits in instance */
- tmp = instance;
- digits = 0;
- do {
- tmp = tmp / 10;
- digits++;
- } while (tmp > 0);
/* check if name will fit in lifr_name */
- if (dapl_os_strlen(drv_name) + digits + 1 > LIFNAMSIZ) {
+ if (dapl_os_strlen(hca_ptr->name) >= LIFNAMSIZ) {
(void) close(sfd);
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "process_ibd: if name overflow %s:%d\n",
- drv_name, instance);
+ "process_ia: if name overflow %s\n",
+ hca_ptr->name);
return (DAT_ERROR(DAT_INVALID_PARAMETER, 0));
}
- (void) dapl_os_strcpy(lifreq.lifr_name, drv_name);
- (void) sprintf(&lifreq.lifr_name[dapl_os_strlen(drv_name)], "%d",
- instance);
+ (void) dapl_os_strcpy(lifreq.lifr_name, hca_ptr->name);
retval = ioctl(sfd, SIOCGLIFADDR, (caddr_t)&lifreq);
if (retval < 0) {
(void) close(sfd);
@@ -382,7 +353,7 @@
* the interface is not plumbed.
*/
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "process_ibd: %s: ip address not found\n",
+ "process_ia: %s: ip address not found\n",
lifreq.lifr_name);
return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
} else {
@@ -396,19 +367,18 @@
}
(void) close(sfd);
- hca_ptr->hca_ibd_inst = instance;
hca_ptr->tavor_idx = hca_idx;
- hca_ptr->node_GUID = hca_guid;
- hca_ptr->port_num = port_num;
- hca_ptr->partition_key = partition_key;
+ hca_ptr->node_GUID = ib_attr->dia_hca_guid;
+ hca_ptr->port_num = ib_attr->dia_portnum;
+ hca_ptr->partition_key = ib_attr->dia_pkey;
(void) dapl_os_memcpy((void *)&hca_ptr->hca_address,
(void *)&lifreq.lifr_addr, sizeof (hca_ptr->hca_address));
hca_ptr->max_inline_send = dapls_tavor_max_inline();
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "process_ibd: interface %s, hca guid 0x%016llx, port %d, "
- "pkey 0x%08x, ip addr %s\n", lifreq.lifr_name, hca_guid,
- port_num, partition_key, dapls_inet_ntop(
+ "process_ia: interface %s, hca guid 0x%016llx, port %d, "
+ "pkey 0x%08x, ip addr %s\n", lifreq.lifr_name, hca_ptr->node_GUID,
+ hca_ptr->port_num, hca_ptr->partition_key, dapls_inet_ntop(
(struct sockaddr *)&hca_ptr->hca_address, addr_buf, 64));
return (DAT_SUCCESS);
}
@@ -674,7 +644,6 @@
(void) sprintf(line_buf, "%s-ib%d", localhost, count + 1);
if (strncmp(line_buf, host_buf, strlen(line_buf)) == 0) {
guid &= 0xfffffffffffffff0;
- hca_ptr->hca_ibd_inst = count + 1;
hca_ptr->tavor_idx = hca_idx;
hca_ptr->node_GUID = guid;
hca_ptr->port_num = count + 1;
--- a/usr/src/pkg/manifests/SUNWcs.mf Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/SUNWcs.mf Wed Apr 14 10:26:18 2010 -0700
@@ -1171,6 +1171,7 @@
$(sparc_ONLY)file path=usr/lib/rcm/modules/SUNW_ttymux_rcm.so mode=0555
file path=usr/lib/rcm/modules/SUNW_vlan_rcm.so mode=0555
file path=usr/lib/rcm/modules/SUNW_vnic_rcm.so mode=0555
+file path=usr/lib/rcm/modules/SUNW_ibpart_rcm.so mode=0555
file path=usr/lib/rcm/rcm_daemon mode=0555
file path=usr/lib/reparse/reparsed group=sys mode=0555
file path=usr/lib/saf/listen group=sys mode=0755
--- a/usr/src/pkg/manifests/driver-network-ib.mf Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/driver-network-ib.mf Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
#
#
@@ -45,7 +44,7 @@
file path=kernel/drv/$(ARCH64)/ib group=sys
$(i386_ONLY)file path=kernel/drv/ib group=sys
file path=kernel/drv/ib.conf group=sys original_name=SUNWib:kernel/drv/ib.conf \
- preserve=true reboot-needed=false
+ preserve=renameold reboot-needed=false
file path=kernel/misc/$(ARCH64)/ibcm group=sys mode=0755 reboot-needed=true
file path=kernel/misc/$(ARCH64)/ibdm group=sys mode=0755 reboot-needed=true
file path=kernel/misc/$(ARCH64)/ibmf group=sys mode=0755 reboot-needed=true
--- a/usr/src/pkg/manifests/driver-network-ibd.mf Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/driver-network-ibd.mf Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
#
#
@@ -30,24 +29,8 @@
# will only be installed into the global zone.
#
<include hollow_zone_pkg>
-set name=pkg.fmri value=pkg:/driver/network/ibd@$(PKGVERS)
-set name=pkg.description value="Sun IP over InfiniBand"
-set name=pkg.summary value="Sun IP over InfiniBand"
-set name=info.classification value=org.opensolaris.category.2008:System/Hardware
+set name=pkg.fmri value=pkg:/driver/network/[email protected],5.11-0.139
+set name=pkg.renamed value=true
set name=variant.arch value=$(ARCH)
set name=variant.opensolaris.zone value=global value=nonglobal
-dir path=kernel group=sys
-dir path=kernel/drv group=sys
-dir path=kernel/drv/$(ARCH64) group=sys
-driver name=ibd alias=ib.ipib clone_perms="ibd 0666 root sys" \
- perms="* 0666 root sys"
-file path=kernel/drv/$(ARCH64)/ibd group=sys
-$(i386_ONLY)file path=kernel/drv/ibd group=sys
-file path=kernel/drv/ibd.conf group=sys \
- original_name=SUNWipoib:kernel/drv/ibd.conf preserve=renamenew
-legacy pkg=SUNWipoib arch=$(ARCH) category=system desc="Sun IP over InfiniBand" \
- hotline="Please contact your local service provider" \
- name="Sun IP over InfiniBand" vendor="Sun Microsystems, Inc." \
- version=11.11,REV=2009.11.11
-license cr_Sun license=cr_Sun
-license lic_CDDL license=lic_CDDL
+depend fmri=pkg:/driver/network/[email protected],5.11-0.139 type=require
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkg/manifests/driver-network-ibp.mf Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+#
+# This package will install successfully into any zone, global or
+# non-global. The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include hollow_zone_pkg>
+set name=pkg.fmri value=pkg:/driver/network/ibp@$(PKGVERS)
+set name=pkg.description value="Sun IP over InfiniBand"
+set name=pkg.summary value="Sun IP over InfiniBand"
+set name=info.classification value=org.opensolaris.category.2008:System/Hardware
+set name=variant.arch value=$(ARCH)
+set name=variant.opensolaris.zone value=global value=nonglobal
+
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+driver name=ibp alias=ib.ipib clone_perms="ibp 0666 root sys" \
+ perms="* 0666 root sys"
+file path=kernel/drv/$(ARCH64)/ibp group=sys
+$(i386_ONLY)file path=kernel/drv/ibp group=sys
+file path=kernel/drv/ibp.conf group=sys \
+ original_name=SUNWipoib:kernel/drv/ibd.conf preserve=renameold
+
+dir path=lib
+dir path=lib/svc
+dir path=lib/svc/method
+file path=lib/svc/method/ibd-post-upgrade mode=0555
+dir path=lib/svc/manifest group=sys
+dir path=lib/svc/manifest/network group=sys
+file path=lib/svc/manifest/network/ibd-post-upgrade.xml group=sys mode=0444
+
+dir path=sbin group=sys
+file path=sbin/ibd_upgrade mode=0555
+file path=sbin/ibd_delete_link mode=0555
+
+legacy pkg=SUNWipoib arch=$(ARCH) category=system desc="Sun IP over InfiniBand" \
+ hotline="Please contact your local service provider" \
+ name="Sun IP over InfiniBand" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
--- a/usr/src/pkg/manifests/system-header.mf Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/system-header.mf Wed Apr 14 10:26:18 2010 -0700
@@ -474,6 +474,7 @@
file path=usr/include/libdllink.h
file path=usr/include/libdlpi.h
file path=usr/include/libdlvlan.h
+file path=usr/include/libdlib.h
file path=usr/include/libelf.h
$(i386_ONLY)file path=usr/include/libfdisk.h
file path=usr/include/libfstyp.h
@@ -1071,6 +1072,7 @@
$(i386_ONLY)file path=usr/include/sys/i8272A.h
file path=usr/include/sys/ia.h
file path=usr/include/sys/iapriocntl.h
+file path=usr/include/sys/ibpart.h
file path=usr/include/sys/ib/adapters/hermon/hermon_ioctl.h
file path=usr/include/sys/ib/adapters/tavor/tavor_ioctl.h
file path=usr/include/sys/ib/clients/ibd/ibd.h
--- a/usr/src/tools/scripts/bfu.sh Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/tools/scripts/bfu.sh Wed Apr 14 10:26:18 2010 -0700
@@ -21,8 +21,7 @@
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
#
# Upgrade a machine from a cpio archive area in about 5 minutes.
# By Roger Faulkner and Jeff Bonwick, April 1993.
@@ -7824,6 +7823,16 @@
rm -f $usr/include/sys/pcmcia/pcelx.h
#
+ # Remove the old ibd driver. It is replaced by ibp driver.
+ #
+ rm -f $root/kernel/drv/ibd
+ rm -f $root/kernel/drv/amd64/ibd
+ rm -f $root/kernel/drv/sparcv9/ibd
+ if [ -f $root/kernel/drv/ibd.conf ]; then
+ mv $root/kernel/drv/ibd.conf $root/kernel/drv/ibp.conf.old
+ fi
+
+ #
# Remove bpp, esp, and dma
#
rm -f $root/kernel/drv/sparcv9/bpp
--- a/usr/src/uts/common/Makefile.files Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/Makefile.files Wed Apr 14 10:26:18 2010 -0700
@@ -663,7 +663,7 @@
SIMNET_OBJS += simnet.o
-IB_OBJS += ibnex.o ibnex_ioctl.o
+IB_OBJS += ibnex.o ibnex_ioctl.o ibnex_hca.o
IBCM_OBJS += ibcm_impl.o ibcm_sm.o ibcm_ti.o ibcm_utils.o ibcm_path.o \
ibcm_arp.o ibcm_arp_link.o
@@ -679,7 +679,7 @@
IBTL_OBJS += ibtl_impl.o ibtl_util.o ibtl_mem.o ibtl_handlers.o ibtl_qp.o \
ibtl_cq.o ibtl_wr.o ibtl_hca.o ibtl_chan.o ibtl_cm.o \
- ibtl_mcg.o ibtl_ibnex.o ibtl_srq.o
+ ibtl_mcg.o ibtl_ibnex.o ibtl_srq.o ibtl_misc.o
TAVOR_OBJS += tavor.o tavor_agents.o tavor_cfg.o tavor_ci.o tavor_cmd.o \
tavor_cq.o tavor_event.o tavor_ioctl.o tavor_misc.o \
--- a/usr/src/uts/common/io/dld/dld_drv.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/dld/dld_drv.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -1373,7 +1372,8 @@
{VNIC_IOC, "vnic", 0, NULL, 0},
{SIMNET_IOC, "simnet", 0, NULL, 0},
{BRIDGE_IOC, "bridge", 0, NULL, 0},
- {IPTUN_IOC, "iptun", 0, NULL, 0}
+ {IPTUN_IOC, "iptun", 0, NULL, 0},
+ {IBPART_IOC, "ibp", -1, NULL, 0}
};
#define DLDIOC_CNT \
(sizeof (dld_ioc_modtable) / sizeof (dld_ioc_modentry_t))
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -63,44 +62,110 @@
#include <sys/ib/mgt/ibmf/ibmf.h> /* for ibd_get_portspeed */
-/*
- * Per-interface tunables (for developers)
+#include <sys/priv_names.h>
+#include <sys/dls.h>
+#include <sys/dld_ioc.h>
+#include <sys/policy.h>
+#include <sys/ibpart.h>
+#include <sys/file.h>
+
+/*
+ * The write-up below includes details on the following:
+ * 1. The dladm administrative model.
+ * 2. Late HCA initialization feature.
+ * 3. Brussels support and its implications to the current architecture.
+ *
+ * 1. The dladm administrative model.
+ * ------------------------------------------
+ * With the dladm model, ibnex will create one ibd instance per port. These
+ * instances will be created independent of the port state.
+ *
+ * The ibd driver is two faceted: One side of it working as the port driver and
+ * the other as the partition object driver.
*
- * ibd_tx_copy_thresh
- * This sets the threshold at which ibd will attempt to do a bcopy of the
- * outgoing data into a pre-mapped buffer. The IPoIB driver's send behavior
- * is restricted by various parameters, so setting of this value must be
- * made after careful considerations only. For instance, IB HCAs currently
- * impose a relatively small limit (when compared to ethernet NICs) on the
- * length of the SGL for transmit. On the other hand, the ip stack could
- * send down mp chains that are quite long when LSO is enabled.
+ * The port instance is a child of the HCA, and will have an entry in the devfs.
+ * A DDI attach only happens for the port driver, and its attach is
+ * handled in ibd_port_attach(). Similary, a DDI detach for the port driver is
+ * handled in ibd_port_unattach().
+ *
+ * The partition object is only a registrant to the mac layer via mac_register()
+ * and does not have an entry in the device tree. There is no DDI softstate
+ * managed by the DDI framework for the partition objects. However, the state is
+ * managed inside the ibd driver, and every partition object hangs off the
+ * "ibd_objlist_head".
+ *
+ * The partition object first comes into existence when a user runs the
+ * 'create-part' subcommand of dladm. This is like invoking the attach entry
+ * point of the partition object. The partition object goes away with the
+ * 'delete-part' subcommand of dladm. This is like invoking the detach entry
+ * point of the partition object.
+ *
+ * The create-part and delete-part subcommands result in dld ioctls that end up
+ * calling ibd_create_parition() and ibd_delete_partition respectively.
+ * There ioctls are registered with the dld layer in _init() via a call to
+ * dld_ioc_register().
+ *
+ * The port instance by itself cannot be plumbed. It is only the partition
+ * objects that can be plumbed and they alone participate in I/O and not the
+ * port driver.
+ *
+ * There are some info ioctls supported in ibd which are used by dladm(1M) to
+ * display useful information. The info entry point for ibd is
+ * ibd_get_partition_info().
*
- * ibd_num_swqe
- * Number of "send WQE" elements that will be allocated and used by ibd.
- * When tuning this parameter, the size of pre-allocated, pre-mapped copy
- * buffer in each of these send wqes must be taken into account. This
- * copy buffer size is determined by the value of IBD_TX_BUF_SZ (this is
- * currently set to the same value of ibd_tx_copy_thresh, but may be
- * changed independently if needed).
+ * 2. Late HCA initialization feature.
+ * ------------------------------------
+ * As mentioned in section 1, the user creates the partition objects via
+ * dladm(1M). It is possible that:
+ * a) The physical port itself is down and the SM cannot be reached.
+ * b) The PKEY specified by the used has not been created in the SM yet.
+ * c) An IPoIB broadcast group for the specified PKEY is not present.
*
- * ibd_num_rwqe
- * Number of "receive WQE" elements that will be allocated and used by
- * ibd. This parameter is limited by the maximum channel size of the HCA.
- * Each buffer in the receive wqe will be of MTU size.
+ * In all of the above cases, complete initialization of the partition object is
+ * not possible. However, the new model allows the creation of partition
+ * objects even in such cases but will defer the initialization for later.
+ * When such a partition object is plumbed, the link state will be displayed as
+ * "down".
+ * The driver, at this point, is listening to events that herald the
+ * availability of resources -
+ * i) LINK_UP when the link becomes available
+ * ii) PORT_CHANGE when the PKEY has been created
+ * iii) MCG_CREATED when the IPoIB broadcast group for the given pkey has been
+ * created
+ * via ibd_async_handler() for events i) and ii), and via
+ * ibd_snet_notices_handler() for iii.
+ * The driver handles these events (as and when they arrive) and completes the
+ * initialization of the partition object and transitions it to a usable state.
*
- * ibd_num_lso_bufs
- * Number of "larger-than-MTU" copy buffers to use for cases when the
- * outgoing mblk chain is too fragmented to be used with ibt_map_mem_iov()
- * and too large to be used with regular MTU-sized copy buffers. It is
- * not recommended to tune this variable without understanding the
- * application environment and/or memory resources. The size of each of
- * these lso buffers is determined by the value of IBD_LSO_BUFSZ.
+ * 3. Brussels support and its implications to the current architecture.
+ * ---------------------------------------------------------------------
+ * The brussels support introduces two new interfaces to the ibd driver -
+ * ibd_m_getprop() and ibd_m_setprop().
+ * These interfaces allow setting and retrieval of certain properties.
+ * Some of them are public properties while most other are private properties
+ * meant to be used by developers. Tuning the latter kind can cause
+ * performance issues and should not be used without understanding the
+ * implications. All properties are specific to an instance of either the
+ * partition object or the port driver.
+ *
+ * The public properties are : mtu and linkmode.
+ * mtu is a read-only property.
+ * linkmode can take two values - UD and CM.
*
- * ibd_num_ah
- * Number of AH cache entries to allocate
- *
- * ibd_hash_size
- * Hash table size for the active AH list
+ * Changing the linkmode requires some bookkeeping in the driver. The
+ * capabilities need to be re-reported to the mac layer. This is done by
+ * calling mac_capab_update(). The maxsdu is updated by calling
+ * mac_maxsdu_update().
+ * The private properties retain their values across the change of linkmode.
+ * NOTE:
+ * - The port driver does not support any property apart from mtu.
+ * - All other properties are only meant for the partition object.
+ * - The properties cannot be set when an instance is plumbed. The
+ * instance has to be unplumbed to effect any setting.
+ */
+
+/*
+ * Driver wide tunables
*
* ibd_tx_softintr
* ibd_rx_softintr
@@ -113,49 +178,17 @@
* allocated and logging is enabled only when IBD_LOGGING is defined.
*
*/
-uint_t ibd_tx_copy_thresh = 0x1000;
-uint_t ibd_num_swqe = 4000;
-uint_t ibd_num_rwqe = 4000;
-uint_t ibd_num_lso_bufs = 0x400;
-uint_t ibd_num_ah = 256;
-uint_t ibd_hash_size = 32;
uint_t ibd_rx_softintr = 1;
uint_t ibd_tx_softintr = 1;
-uint_t ibd_create_broadcast_group = 1;
+
#ifdef IBD_LOGGING
uint_t ibd_log_sz = 0x20000;
#endif
-#define IBD_TX_COPY_THRESH ibd_tx_copy_thresh
-#define IBD_TX_BUF_SZ ibd_tx_copy_thresh
-#define IBD_NUM_SWQE ibd_num_swqe
-#define IBD_NUM_RWQE ibd_num_rwqe
-#define IBD_NUM_LSO_BUFS ibd_num_lso_bufs
-#define IBD_NUM_AH ibd_num_ah
-#define IBD_HASH_SIZE ibd_hash_size
#ifdef IBD_LOGGING
#define IBD_LOG_SZ ibd_log_sz
#endif
-/*
- * ibd_rc_tx_copy_thresh
- * This sets the threshold upto which ibd will attempt to do a bcopy of the
- * outgoing data into a pre-mapped buffer.
- */
-uint_t ibd_rc_tx_copy_thresh = 0x1000;
-
-/*
- * Receive CQ moderation parameters: tunable (for developers)
- */
-uint_t ibd_rxcomp_count = 4;
-uint_t ibd_rxcomp_usec = 10;
-
-/*
- * Send CQ moderation parameters: tunable (for developers)
- */
-uint_t ibd_txcomp_count = 16;
-uint_t ibd_txcomp_usec = 300;
-
/* Post IBD_RX_POST_CNT receive work requests at a time. */
#define IBD_RX_POST_CNT 8
@@ -170,7 +203,6 @@
*/
#define IBD_LSO_MAXLEN 65536
#define IBD_LSO_BUFSZ 8192
-#define IBD_PROP_LSO_POLICY "lso-policy"
/*
* Async operation states
@@ -184,32 +216,35 @@
/*
* State of IBD driver initialization during attach/m_start
*/
-#define IBD_DRV_STATE_INITIALIZED 0x00001
-#define IBD_DRV_RXINTR_ADDED 0x00002
-#define IBD_DRV_TXINTR_ADDED 0x00004
-#define IBD_DRV_IBTL_ATTACH_DONE 0x00008
-#define IBD_DRV_HCA_OPENED 0x00010
-#define IBD_DRV_PD_ALLOCD 0x00020
-#define IBD_DRV_MAC_REGISTERED 0x00040
-#define IBD_DRV_PORT_DETAILS_OBTAINED 0x00080
-#define IBD_DRV_BCAST_GROUP_FOUND 0x00100
-#define IBD_DRV_ACACHE_INITIALIZED 0x00200
-#define IBD_DRV_CQS_ALLOCD 0x00400
-#define IBD_DRV_UD_CHANNEL_SETUP 0x00800
-#define IBD_DRV_TXLIST_ALLOCD 0x01000
-#define IBD_DRV_SCQ_NOTIFY_ENABLED 0x02000
-#define IBD_DRV_RXLIST_ALLOCD 0x04000
-#define IBD_DRV_BCAST_GROUP_JOINED 0x08000
-#define IBD_DRV_ASYNC_THR_CREATED 0x10000
-#define IBD_DRV_RCQ_NOTIFY_ENABLED 0x20000
-#define IBD_DRV_SM_NOTICES_REGISTERED 0x40000
-#define IBD_DRV_STARTED 0x80000
+#define IBD_DRV_STATE_INITIALIZED 0x000001
+#define IBD_DRV_RXINTR_ADDED 0x000002
+#define IBD_DRV_TXINTR_ADDED 0x000004
+#define IBD_DRV_IBTL_ATTACH_DONE 0x000008
+#define IBD_DRV_HCA_OPENED 0x000010
+#define IBD_DRV_PD_ALLOCD 0x000020
+#define IBD_DRV_MAC_REGISTERED 0x000040
+#define IBD_DRV_PORT_DETAILS_OBTAINED 0x000080
+#define IBD_DRV_BCAST_GROUP_FOUND 0x000100
+#define IBD_DRV_ACACHE_INITIALIZED 0x000200
+#define IBD_DRV_CQS_ALLOCD 0x000400
+#define IBD_DRV_UD_CHANNEL_SETUP 0x000800
+#define IBD_DRV_TXLIST_ALLOCD 0x001000
+#define IBD_DRV_SCQ_NOTIFY_ENABLED 0x002000
+#define IBD_DRV_RXLIST_ALLOCD 0x004000
+#define IBD_DRV_BCAST_GROUP_JOINED 0x008000
+#define IBD_DRV_ASYNC_THR_CREATED 0x010000
+#define IBD_DRV_RCQ_NOTIFY_ENABLED 0x020000
+#define IBD_DRV_SM_NOTICES_REGISTERED 0x040000
+#define IBD_DRV_STARTED 0x080000
#define IBD_DRV_RC_SRQ_ALLOCD 0x100000
#define IBD_DRV_RC_LARGEBUF_ALLOCD 0x200000
#define IBD_DRV_RC_LISTEN 0x400000
#ifdef DEBUG
#define IBD_DRV_RC_PRIVATE_STATE 0x800000
#endif
+#define IBD_DRV_IN_DELETION 0x1000000
+#define IBD_DRV_IN_LATE_HCA_INIT 0x2000000
+#define IBD_DRV_REQ_LIST_INITED 0x4000000
/*
* Start/stop in-progress flags; note that restart must always remain
@@ -218,12 +253,16 @@
#define IBD_DRV_START_IN_PROGRESS 0x10000000
#define IBD_DRV_STOP_IN_PROGRESS 0x20000000
#define IBD_DRV_RESTART_IN_PROGRESS 0x30000000
+#define IBD_DRV_DELETE_IN_PROGRESS IBD_DRV_RESTART_IN_PROGRESS
/*
* Miscellaneous constants
*/
#define IB_MGID_IPV4_LOWGRP_MASK 0xFFFFFFFF
#define IBD_DEF_MAX_SDU 2044
+#define IBD_DEF_MAX_MTU (IBD_DEF_MAX_SDU + IPOIB_HDRSIZE)
+#define IBD_DEF_RC_MAX_SDU 65520
+#define IBD_DEF_RC_MAX_MTU (IBD_DEF_RC_MAX_SDU + IPOIB_HDRSIZE)
#define IBD_DEFAULT_QKEY 0xB1B
#ifdef IBD_LOGGING
#define IBD_DMAX_LINE 100
@@ -249,6 +288,12 @@
ibd_global_state_t ibd_gstate;
/*
+ * Partition object list
+ */
+ibd_state_t *ibd_objlist_head = NULL;
+kmutex_t ibd_objlist_lock;
+
+/*
* Logging
*/
#ifdef IBD_LOGGING
@@ -275,6 +320,15 @@
static mblk_t *ibd_m_tx(void *, mblk_t *);
static boolean_t ibd_m_getcapab(void *, mac_capab_t, void *);
+static int ibd_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
+ const void *);
+static int ibd_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
+static void ibd_m_propinfo(void *, const char *, mac_prop_id_t,
+ mac_prop_info_handle_t);
+static int ibd_set_priv_prop(ibd_state_t *, const char *, uint_t,
+ const void *);
+static int ibd_get_priv_prop(ibd_state_t *, const char *, uint_t, void *);
+
/*
* Private driver entry points for GLDv3
*/
@@ -339,6 +393,8 @@
static void ibd_freemsg_cb(char *);
static void ibd_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
ibt_async_event_t *);
+static void ibdpd_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
+ ibt_async_event_t *);
static void ibd_snet_notices_handler(void *, ib_gid_t,
ibt_subnet_event_code_t, ibt_subnet_event_t *);
@@ -393,8 +449,7 @@
* Helpers for attach/start routines
*/
static int ibd_register_mac(ibd_state_t *, dev_info_t *);
-static int ibd_record_capab(ibd_state_t *, dev_info_t *);
-static int ibd_unattach(ibd_state_t *, dev_info_t *);
+static int ibd_record_capab(ibd_state_t *);
static int ibd_get_port_details(ibd_state_t *);
static int ibd_alloc_cqs(ibd_state_t *);
static int ibd_setup_ud_channel(ibd_state_t *);
@@ -402,6 +457,11 @@
static int ibd_undo_start(ibd_state_t *, link_state_t);
static void ibd_set_mac_progress(ibd_state_t *, uint_t);
static void ibd_clr_mac_progress(ibd_state_t *, uint_t);
+static int ibd_part_attach(ibd_state_t *state, dev_info_t *dip);
+static int ibd_part_unattach(ibd_state_t *state);
+static int ibd_port_attach(dev_info_t *);
+static int ibd_port_unattach(ibd_state_t *state, dev_info_t *dip);
+static int ibd_get_port_state(ibd_state_t *, link_state_t *);
/*
@@ -414,6 +474,10 @@
static void *list_get_head(list_t *);
static int ibd_hash_key_cmp(mod_hash_key_t, mod_hash_key_t);
static uint_t ibd_hash_by_id(void *, mod_hash_key_t);
+
+ibt_status_t ibd_get_part_attr(datalink_id_t, ibt_part_attr_t *);
+ibt_status_t ibd_get_all_part_attr(ibt_part_attr_t **, int *);
+
#ifdef IBD_LOGGING
static void ibd_log(const char *, ...);
#endif
@@ -441,13 +505,23 @@
IBT_NETWORK,
ibd_async_handler,
NULL,
+ "IBPART"
+};
+
+static struct ibt_clnt_modinfo_s ibdpd_clnt_modinfo = {
+ IBTI_V_CURR,
+ IBT_NETWORK,
+ ibdpd_async_handler,
+ NULL,
"IPIB"
};
/*
* GLDv3 entry points
*/
-#define IBD_M_CALLBACK_FLAGS (MC_GETCAPAB)
+#define IBD_M_CALLBACK_FLAGS \
+ (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
+
static mac_callbacks_t ibd_m_callbacks = {
IBD_M_CALLBACK_FLAGS,
ibd_m_stat,
@@ -459,7 +533,55 @@
ibd_m_tx,
NULL,
NULL,
- ibd_m_getcapab
+ ibd_m_getcapab,
+ NULL,
+ NULL,
+ ibd_m_setprop,
+ ibd_m_getprop,
+ ibd_m_propinfo
+};
+
+/* Private properties */
+char *ibd_priv_props[] = {
+ "_ibd_broadcast_group",
+ "_ibd_coalesce_completions",
+ "_ibd_create_broadcast_group",
+ "_ibd_hash_size",
+ "_ibd_lso_enable",
+ "_ibd_num_ah",
+ "_ibd_num_lso_bufs",
+ "_ibd_rc_enable_srq",
+ "_ibd_rc_num_rwqe",
+ "_ibd_rc_num_srq",
+ "_ibd_rc_num_swqe",
+ "_ibd_rc_rx_comp_count",
+ "_ibd_rc_rx_comp_usec",
+ "_ibd_rc_rx_copy_thresh",
+ "_ibd_rc_rx_rwqe_thresh",
+ "_ibd_rc_tx_comp_count",
+ "_ibd_rc_tx_comp_usec",
+ "_ibd_rc_tx_copy_thresh",
+ "_ibd_ud_num_rwqe",
+ "_ibd_ud_num_swqe",
+ "_ibd_ud_rx_comp_count",
+ "_ibd_ud_rx_comp_usec",
+ "_ibd_ud_tx_comp_count",
+ "_ibd_ud_tx_comp_usec",
+ "_ibd_ud_tx_copy_thresh",
+ NULL
+};
+
+static int ibd_create_partition(void *, intptr_t, int, cred_t *, int *);
+static int ibd_delete_partition(void *, intptr_t, int, cred_t *, int *);
+static int ibd_get_partition_info(void *, intptr_t, int, cred_t *, int *);
+
+static dld_ioc_info_t ibd_dld_ioctl_list[] = {
+ {IBD_CREATE_IBPART, DLDCOPYINOUT, sizeof (ibpart_ioctl_t),
+ ibd_create_partition, secpolicy_dl_config},
+ {IBD_DELETE_IBPART, DLDCOPYIN, sizeof (ibpart_ioctl_t),
+ ibd_delete_partition, secpolicy_dl_config},
+ {IBD_INFO_IBPART, DLDCOPYIN, sizeof (ibd_ioctl_t),
+ ibd_get_partition_info, NULL}
};
/*
@@ -701,8 +823,8 @@
ibd_state_s::id_bgroup_created
ibd_state_s::id_mac_state
ibd_state_s::id_mtu
- ibd_state_s::id_num_rwqe
- ibd_state_s::id_num_swqe
+ ibd_state_s::id_ud_num_rwqe
+ ibd_state_s::id_ud_num_swqe
ibd_state_s::id_qpnum
ibd_state_s::id_rcq_hdl
ibd_state_s::id_rx_buf_sz
@@ -885,7 +1007,9 @@
return (status);
}
- mac_init_ops(&ibd_dev_ops, "ibd");
+ mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
+
+ mac_init_ops(&ibd_dev_ops, "ibp");
status = mod_install(&ibd_modlinkage);
if (status != 0) {
DPRINT(10, "_init:failed in mod_install()");
@@ -901,6 +1025,13 @@
ibd_gstate.ig_service_list = NULL;
mutex_exit(&ibd_gstate.ig_mutex);
+ if (dld_ioc_register(IBPART_IOC, ibd_dld_ioctl_list,
+ DLDIOCCNT(ibd_dld_ioctl_list)) != 0) {
+ return (EIO);
+ }
+
+ ibt_register_part_attr_cb(ibd_get_part_attr, ibd_get_all_part_attr);
+
#ifdef IBD_LOGGING
ibd_log_init();
#endif
@@ -922,7 +1053,10 @@
if (status != 0)
return (status);
+ ibt_unregister_part_attr_cb();
+
mac_fini_ops(&ibd_dev_ops);
+ mutex_destroy(&ibd_objlist_lock);
ddi_soft_state_fini(&ibd_list);
mutex_destroy(&ibd_gstate.ig_mutex);
#ifdef IBD_LOGGING
@@ -1091,6 +1225,17 @@
mutex_exit(&state->id_acache_req_lock);
/*
+ * If we are in late hca initialization mode, do not
+ * process any other async request other than TRAP. TRAP
+ * is used for indicating creation of a broadcast group;
+ * in which case, we need to join/create the group.
+ */
+ if ((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) &&
+ (ptr->rq_op != IBD_ASYNC_TRAP)) {
+ goto free_req_and_continue;
+ }
+
+ /*
* Once we have done the operation, there is no
* guarantee the request slot is going to be valid,
* it might be freed up (as in IBD_ASYNC_LEAVE, REAP,
@@ -1152,6 +1297,7 @@
ibd_async_rc_recycle_ace(state, ptr);
break;
}
+free_req_and_continue:
if (ptr != NULL)
kmem_cache_free(state->id_req_kmc, ptr);
@@ -1252,9 +1398,6 @@
ibd_ace_t *ce;
int i;
- mutex_init(&state->id_acache_req_lock, NULL, MUTEX_DRIVER, NULL);
- cv_init(&state->id_acache_req_cv, NULL, CV_DEFAULT, NULL);
-
mutex_init(&state->id_ac_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_mc_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_enter(&state->id_ac_mutex);
@@ -1263,19 +1406,17 @@
list_create(&state->id_ah_active, sizeof (ibd_ace_t),
offsetof(ibd_ace_t, ac_list));
state->id_ah_active_hash = mod_hash_create_extended("IBD AH hash",
- IBD_HASH_SIZE, mod_hash_null_keydtor, mod_hash_null_valdtor,
+ state->id_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
ibd_hash_by_id, NULL, ibd_hash_key_cmp, KM_SLEEP);
list_create(&state->id_mc_full, sizeof (ibd_mce_t),
offsetof(ibd_mce_t, mc_list));
list_create(&state->id_mc_non, sizeof (ibd_mce_t),
offsetof(ibd_mce_t, mc_list));
- list_create(&state->id_req_list, sizeof (ibd_req_t),
- offsetof(ibd_req_t, rq_list));
state->id_ac_hot_ace = NULL;
state->id_ac_list = ce = (ibd_ace_t *)kmem_zalloc(sizeof (ibd_ace_t) *
- IBD_NUM_AH, KM_SLEEP);
- for (i = 0; i < IBD_NUM_AH; i++, ce++) {
+ state->id_num_ah, KM_SLEEP);
+ for (i = 0; i < state->id_num_ah; i++, ce++) {
if (ibt_alloc_ud_dest(state->id_hca_hdl, IBT_UD_DEST_NO_FLAGS,
state->id_pd_hdl, &ce->ac_dest) != IBT_SUCCESS) {
mutex_exit(&state->id_ac_mutex);
@@ -1316,13 +1457,10 @@
list_destroy(&state->id_ah_active);
list_destroy(&state->id_mc_full);
list_destroy(&state->id_mc_non);
- list_destroy(&state->id_req_list);
- kmem_free(state->id_ac_list, sizeof (ibd_ace_t) * IBD_NUM_AH);
+ kmem_free(state->id_ac_list, sizeof (ibd_ace_t) * state->id_num_ah);
mutex_exit(&state->id_ac_mutex);
mutex_destroy(&state->id_ac_mutex);
mutex_destroy(&state->id_mc_mutex);
- mutex_destroy(&state->id_acache_req_lock);
- cv_destroy(&state->id_acache_req_cv);
}
/*
@@ -1960,6 +2098,12 @@
}
/*
+ * Late HCA Initialization:
+ * If plumb had succeeded without the availability of an active port or the
+ * pkey, and either of their availability is now being indicated via PORT_UP
+ * or PORT_CHANGE respectively, try a start of the interface.
+ *
+ * Normal Operation:
* When the link is notified up, we need to do a few things, based
* on the port's current p_init_type_reply claiming a reinit has been
* done or not. The reinit steps are:
@@ -1995,8 +2139,8 @@
mutex_enter(&state->id_link_mutex);
/*
- * If the init code in ibd_m_start hasn't yet set up the
- * pkey/gid, nothing to do; that code will set the link state.
+ * If the link state is unknown, a plumb has not yet been attempted
+ * on the interface. Nothing to do.
*/
if (state->id_link_state == LINK_STATE_UNKNOWN) {
mutex_exit(&state->id_link_mutex);
@@ -2004,6 +2148,17 @@
}
/*
+ * If link state is down because of plumb failure, and we are not in
+ * late HCA init, and we were not successfully plumbed, nothing to do.
+ */
+ if ((state->id_link_state == LINK_STATE_DOWN) &&
+ ((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) == 0) &&
+ ((state->id_mac_state & IBD_DRV_STARTED) == 0)) {
+ mutex_exit(&state->id_link_mutex);
+ goto link_mod_return;
+ }
+
+ /*
* If this routine was called in response to a port down event,
* we just need to see if this should be informed.
*/
@@ -2028,6 +2183,21 @@
}
/*
+ * If in the previous attempt, the pkey was not found either due to the
+ * port state being down, or due to it's absence in the pkey table,
+ * look for it now and try to start the interface.
+ */
+ if (state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) {
+ mutex_exit(&state->id_link_mutex);
+ if ((ret = ibd_start(state)) != 0) {
+ DPRINT(10, "ibd_linkmod: cannot start from late HCA "
+ "init, ret=%d", ret);
+ }
+ ibt_free_portinfo(port_infop, port_infosz);
+ goto link_mod_return;
+ }
+
+ /*
* Check the SM InitTypeReply flags. If both NoLoadReply and
* PreserveContentReply are 0, we don't know anything about the
* data loaded into the port attributes, so we need to verify
@@ -2234,11 +2404,14 @@
macp->m_src_addr = (uint8_t *)&state->id_macaddr;
macp->m_callbacks = &ibd_m_callbacks;
macp->m_min_sdu = 0;
- if (state->id_enable_rc) {
+ if (state->id_type == IBD_PORT_DRIVER) {
+ macp->m_max_sdu = IBD_DEF_RC_MAX_SDU;
+ } else if (state->id_enable_rc) {
macp->m_max_sdu = state->rc_mtu - IPOIB_HDRSIZE;
} else {
macp->m_max_sdu = IBD_DEF_MAX_SDU;
}
+ macp->m_priv_props = ibd_priv_props;
/*
* Register ourselves with the GLDv3 interface
@@ -2255,7 +2428,7 @@
}
static int
-ibd_record_capab(ibd_state_t *state, dev_info_t *dip)
+ibd_record_capab(ibd_state_t *state)
{
ibt_hca_attr_t hca_attrs;
ibt_status_t ibt_status;
@@ -2285,17 +2458,9 @@
* 2. Set LSO policy, capability and maximum length
*/
if (state->id_enable_rc) {
- state->id_lso_policy = B_FALSE;
state->id_lso_capable = B_FALSE;
state->id_lso_maxlen = 0;
} else {
- if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS
- |DDI_PROP_NOTPROM, IBD_PROP_LSO_POLICY, 1)) {
- state->id_lso_policy = B_TRUE;
- } else {
- state->id_lso_policy = B_FALSE;
- }
-
if (hca_attrs.hca_max_lso_size > 0) {
state->id_lso_capable = B_TRUE;
if (hca_attrs.hca_max_lso_size > IBD_LSO_MAXLEN)
@@ -2356,28 +2521,30 @@
/*
* 5. Set number of recv and send wqes after checking hca maximum
- * channel size
- */
- if (hca_attrs.hca_max_chan_sz < IBD_NUM_RWQE) {
- state->id_num_rwqe = hca_attrs.hca_max_chan_sz;
- } else {
- state->id_num_rwqe = IBD_NUM_RWQE;
- }
- state->id_rx_bufs_outstanding_limit = state->id_num_rwqe - IBD_RWQE_MIN;
- if (hca_attrs.hca_max_chan_sz < IBD_NUM_SWQE) {
- state->id_num_swqe = hca_attrs.hca_max_chan_sz;
- } else {
- state->id_num_swqe = IBD_NUM_SWQE;
- }
+ * channel size. Store the max channel size in the state so that it
+ * can be referred to when the swqe/rwqe change is requested via
+ * dladm.
+ */
+
+ state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
+
+ if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
+ state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
+
+ state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
+ IBD_RWQE_MIN;
+
+ if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
+ state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
+
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
return (DDI_SUCCESS);
}
static int
-ibd_unattach(ibd_state_t *state, dev_info_t *dip)
-{
- int instance;
+ibd_part_unattach(ibd_state_t *state)
+{
uint32_t progress = state->id_mac_state;
ibt_status_t ret;
@@ -2405,6 +2572,33 @@
state->id_mac_state &= (~IBD_DRV_MAC_REGISTERED);
}
+ if (progress & IBD_DRV_ASYNC_THR_CREATED) {
+ /*
+ * No new async requests will be posted since the device
+ * link state has been marked as unknown; completion handlers
+ * have been turned off, so Tx handler will not cause any
+ * more IBD_ASYNC_REAP requests.
+ *
+ * Queue a request for the async thread to exit, which will
+ * be serviced after any pending ones. This can take a while,
+ * specially if the SM is unreachable, since IBMF will slowly
+ * timeout each SM request issued by the async thread. Reap
+ * the thread before continuing on, we do not want it to be
+ * lingering in modunloaded code.
+ */
+ ibd_queue_work_slot(state, &state->id_ah_req, IBD_ASYNC_EXIT);
+ thread_join(state->id_async_thrid);
+
+ state->id_mac_state &= (~IBD_DRV_ASYNC_THR_CREATED);
+ }
+
+ if (progress & IBD_DRV_REQ_LIST_INITED) {
+ list_destroy(&state->id_req_list);
+ mutex_destroy(&state->id_acache_req_lock);
+ cv_destroy(&state->id_acache_req_cv);
+ state->id_mac_state &= ~IBD_DRV_REQ_LIST_INITED;
+ }
+
if (progress & IBD_DRV_PD_ALLOCD) {
if ((ret = ibt_free_pd(state->id_hca_hdl,
state->id_pd_hdl)) != IBT_SUCCESS) {
@@ -2471,44 +2665,22 @@
state->id_mac_state &= (~IBD_DRV_STATE_INITIALIZED);
}
- instance = ddi_get_instance(dip);
- ddi_soft_state_free(ibd_list, instance);
-
return (DDI_SUCCESS);
}
-/*
- * Attach device to the IO framework.
- */
-static int
-ibd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
-{
- ibd_state_t *state = NULL;
- ib_guid_t hca_guid;
- int instance;
+int
+ibd_part_attach(ibd_state_t *state, dev_info_t *dip)
+{
ibt_status_t ret;
int rv;
-
- /*
- * IBD doesn't support suspend/resume
- */
- if (cmd != DDI_ATTACH)
- return (DDI_FAILURE);
-
- /*
- * Allocate softstate structure
- */
- instance = ddi_get_instance(dip);
- if (ddi_soft_state_zalloc(ibd_list, instance) == DDI_FAILURE)
- return (DDI_FAILURE);
- state = ddi_get_soft_state(ibd_list, instance);
+ kthread_t *kht;
/*
* Initialize mutexes and condition variables
*/
if (ibd_state_init(state, dip) != DDI_SUCCESS) {
DPRINT(10, "ibd_attach: failed in ibd_state_init()");
- goto attach_fail;
+ return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_STATE_INITIALIZED;
@@ -2520,7 +2692,7 @@
NULL, NULL, ibd_intr, (caddr_t)state)) != DDI_SUCCESS) {
DPRINT(10, "ibd_attach: failed in "
"ddi_add_softintr(id_rx), ret=%d", rv);
- goto attach_fail;
+ return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_RXINTR_ADDED;
}
@@ -2530,37 +2702,12 @@
(caddr_t)state)) != DDI_SUCCESS) {
DPRINT(10, "ibd_attach: failed in "
"ddi_add_softintr(id_tx), ret=%d", rv);
- goto attach_fail;
+ return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_TXINTR_ADDED;
}
/*
- * Obtain IBA P_Key, port number and HCA guid and validate
- * them (for P_Key, only full members are allowed as per
- * IPoIB specification; neither port number nor HCA guid
- * can be zero)
- */
- if ((state->id_pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
- "port-pkey", IB_PKEY_INVALID_LIMITED)) <= IB_PKEY_INVALID_FULL) {
- DPRINT(10, "ibd_attach: port device has wrong partition (0x%x)",
- state->id_pkey);
- goto attach_fail;
- }
- if ((state->id_port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
- "port-number", 0)) == 0) {
- DPRINT(10, "ibd_attach: invalid port number (%d)",
- state->id_port);
- goto attach_fail;
- }
- if ((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
- "hca-guid", 0)) == 0) {
- DPRINT(10, "ibd_attach: port hca has invalid guid (0x%llx)",
- hca_guid);
- goto attach_fail;
- }
-
- /*
* Attach to IBTL
*/
mutex_enter(&ibd_gstate.ig_mutex);
@@ -2570,15 +2717,14 @@
DPRINT(10, "ibd_attach: global: failed in "
"ibt_attach(), ret=%d", ret);
mutex_exit(&ibd_gstate.ig_mutex);
- goto attach_fail;
+ return (DDI_FAILURE);
}
}
if ((ret = ibt_attach(&ibd_clnt_modinfo, dip, state,
&state->id_ibt_hdl)) != IBT_SUCCESS) {
- DPRINT(10, "ibd_attach: failed in ibt_attach(), ret=%d",
- ret);
+ DPRINT(10, "ibd_attach: failed in ibt_attach(), ret=%d", ret);
mutex_exit(&ibd_gstate.ig_mutex);
- goto attach_fail;
+ return (DDI_FAILURE);
}
ibd_gstate.ig_ibt_hdl_ref_cnt++;
mutex_exit(&ibd_gstate.ig_mutex);
@@ -2587,22 +2733,19 @@
/*
* Open the HCA
*/
- if ((ret = ibt_open_hca(state->id_ibt_hdl, hca_guid,
+ if ((ret = ibt_open_hca(state->id_ibt_hdl, state->id_hca_guid,
&state->id_hca_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_attach: ibt_open_hca() failed, ret=%d", ret);
- goto attach_fail;
+ return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_HCA_OPENED;
- /* Get RC config before ibd_record_capab */
- ibd_rc_get_conf(state);
-
#ifdef DEBUG
/* Initialize Driver Counters for Reliable Connected Mode */
if (state->id_enable_rc) {
if (ibd_rc_init_stats(state) != DDI_SUCCESS) {
DPRINT(10, "ibd_attach: failed in ibd_rc_init_stats");
- goto attach_fail;
+ return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_RC_PRIVATE_STATE;
}
@@ -2611,7 +2754,7 @@
/*
* Record capabilities
*/
- (void) ibd_record_capab(state, dip);
+ (void) ibd_record_capab(state);
/*
* Allocate a protection domain on the HCA
@@ -2619,32 +2762,49 @@
if ((ret = ibt_alloc_pd(state->id_hca_hdl, IBT_PD_NO_FLAGS,
&state->id_pd_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_attach: ibt_alloc_pd() failed, ret=%d", ret);
- goto attach_fail;
+ return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_PD_ALLOCD;
/*
- * Register ibd interfaces with the Nemo framework
- */
- if (ibd_register_mac(state, dip) != IBT_SUCCESS) {
- DPRINT(10, "ibd_attach: failed in ibd_register_mac()");
- goto attach_fail;
- }
- state->id_mac_state |= IBD_DRV_MAC_REGISTERED;
-
- /*
- * We're done with everything we could to make the attach
- * succeed. All the buffer allocations and IPoIB broadcast
- * group joins are deferred to when the interface instance
- * is actually plumbed to avoid wasting memory.
- */
+ * We need to initialise the req_list that is required for the
+ * operation of the async_thread.
+ */
+ mutex_init(&state->id_acache_req_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&state->id_acache_req_cv, NULL, CV_DEFAULT, NULL);
+ list_create(&state->id_req_list, sizeof (ibd_req_t),
+ offsetof(ibd_req_t, rq_list));
+ state->id_mac_state |= IBD_DRV_REQ_LIST_INITED;
+
+ /*
+ * Create the async thread; thread_create never fails.
+ */
+ kht = thread_create(NULL, 0, ibd_async_work, state, 0, &p0,
+ TS_RUN, minclsyspri);
+ state->id_async_thrid = kht->t_did;
+ state->id_mac_state |= IBD_DRV_ASYNC_THR_CREATED;
+
return (DDI_SUCCESS);
-
-attach_fail:
- (void) ibd_unattach(state, dip);
- _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
- return (DDI_FAILURE);
+}
+
+/*
+ * Attach device to the IO framework.
+ */
+static int
+ibd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int ret;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ ret = ibd_port_attach(dip);
+ break;
+ default:
+ ret = DDI_FAILURE;
+ break;
+ }
+ return (ret);
}
/*
@@ -2673,7 +2833,7 @@
* done ibd_attach(), ibd_m_start() and ibd_m_stop() correctly
* so far, we should find all the flags we need in id_mac_state.
*/
- return (ibd_unattach(state, dip));
+ return (ibd_port_unattach(state, dip));
}
/*
@@ -2708,13 +2868,11 @@
state->id_rx_list.dl_cnt = 0;
mutex_init(&state->id_rx_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
- (void) sprintf(buf, "ibd_req%d", ddi_get_instance(dip));
+ (void) sprintf(buf, "ibd_req%d_%x", ddi_get_instance(dip),
+ state->id_pkey);
state->id_req_kmc = kmem_cache_create(buf, sizeof (ibd_req_t),
0, NULL, NULL, NULL, NULL, NULL, 0);
- mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL);
- cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL);
-
/* For Reliable Connected Mode */
mutex_init(&state->rc_rx_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->rc_tx_large_bufs_lock, NULL, MUTEX_DRIVER, NULL);
@@ -2723,6 +2881,41 @@
mutex_init(&state->rc_pass_chan_list.chan_list_mutex, NULL,
MUTEX_DRIVER, NULL);
+ /*
+ * Make the default link mode as RC. If this fails during connection
+ * setup, the link mode is automatically transitioned to UD.
+ * Also set the RC MTU.
+ */
+ state->id_enable_rc = IBD_DEF_LINK_MODE;
+ state->rc_mtu = IBD_DEF_RC_MAX_MTU;
+ state->id_mtu = IBD_DEF_MAX_MTU;
+
+ /* Iniatialize all tunables to default */
+ state->id_lso_policy = IBD_DEF_LSO_POLICY;
+ state->id_num_lso_bufs = IBD_DEF_NUM_LSO_BUFS;
+ state->id_num_ah = IBD_DEF_NUM_AH;
+ state->id_hash_size = IBD_DEF_HASH_SIZE;
+ state->id_create_broadcast_group = IBD_DEF_CREATE_BCAST_GROUP;
+ state->id_allow_coalesce_comp_tuning = IBD_DEF_COALESCE_COMPLETIONS;
+ state->id_ud_rx_comp_count = IBD_DEF_UD_RX_COMP_COUNT;
+ state->id_ud_rx_comp_usec = IBD_DEF_UD_RX_COMP_USEC;
+ state->id_ud_tx_comp_count = IBD_DEF_UD_TX_COMP_COUNT;
+ state->id_ud_tx_comp_usec = IBD_DEF_UD_TX_COMP_USEC;
+ state->id_rc_rx_comp_count = IBD_DEF_RC_RX_COMP_COUNT;
+ state->id_rc_rx_comp_usec = IBD_DEF_RC_RX_COMP_USEC;
+ state->id_rc_tx_comp_count = IBD_DEF_RC_TX_COMP_COUNT;
+ state->id_rc_tx_comp_usec = IBD_DEF_RC_TX_COMP_USEC;
+ state->id_ud_tx_copy_thresh = IBD_DEF_UD_TX_COPY_THRESH;
+ state->id_rc_rx_copy_thresh = IBD_DEF_RC_RX_COPY_THRESH;
+ state->id_rc_tx_copy_thresh = IBD_DEF_RC_TX_COPY_THRESH;
+ state->id_ud_num_rwqe = IBD_DEF_UD_NUM_RWQE;
+ state->id_ud_num_swqe = IBD_DEF_UD_NUM_SWQE;
+ state->id_rc_num_rwqe = IBD_DEF_RC_NUM_RWQE;
+ state->id_rc_num_swqe = IBD_DEF_RC_NUM_SWQE;
+ state->rc_enable_srq = IBD_DEF_RC_ENABLE_SRQ;
+ state->id_rc_num_srq = IBD_DEF_RC_NUM_SRQ;
+ state->id_rc_rx_rwqe_thresh = IBD_DEF_RC_RX_RWQE_THRESH;
+
return (DDI_SUCCESS);
}
@@ -2732,9 +2925,6 @@
static void
ibd_state_fini(ibd_state_t *state)
{
- cv_destroy(&state->id_macst_cv);
- mutex_destroy(&state->id_macst_lock);
-
kmem_cache_destroy(state->id_req_kmc);
mutex_destroy(&state->id_rx_list.dl_mutex);
@@ -3213,6 +3403,7 @@
ibt_mcg_info_t mcg_info;
state->id_bgroup_created = B_FALSE;
+ state->id_bgroup_present = B_FALSE;
query_bcast_grp:
bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
@@ -3242,7 +3433,7 @@
}
if (!found) {
- if (ibd_create_broadcast_group) {
+ if (state->id_create_broadcast_group) {
/*
* If we created the broadcast group, but failed to
* find it, we can't do anything except leave the
@@ -3300,6 +3491,7 @@
goto find_bgroup_fail;
}
state->id_mtu = mcgmtu;
+ state->id_bgroup_present = B_TRUE;
return (IBT_SUCCESS);
@@ -3323,30 +3515,30 @@
*/
state->id_tx_buf_sz = state->id_mtu;
if (state->id_lso_policy && state->id_lso_capable &&
- (IBD_TX_BUF_SZ > state->id_mtu)) {
- state->id_tx_buf_sz = IBD_TX_BUF_SZ;
- }
-
- state->id_tx_bufs = kmem_zalloc(state->id_num_swqe *
+ (state->id_ud_tx_copy_thresh > state->id_mtu)) {
+ state->id_tx_buf_sz = state->id_ud_tx_copy_thresh;
+ }
+
+ state->id_tx_bufs = kmem_zalloc(state->id_ud_num_swqe *
state->id_tx_buf_sz, KM_SLEEP);
- state->id_tx_wqes = kmem_zalloc(state->id_num_swqe *
+ state->id_tx_wqes = kmem_zalloc(state->id_ud_num_swqe *
sizeof (ibd_swqe_t), KM_SLEEP);
/*
* Do one memory registration on the entire txbuf area
*/
mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->id_tx_bufs;
- mem_attr.mr_len = state->id_num_swqe * state->id_tx_buf_sz;
+ mem_attr.mr_len = state->id_ud_num_swqe * state->id_tx_buf_sz;
mem_attr.mr_as = NULL;
mem_attr.mr_flags = IBT_MR_SLEEP;
if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
&state->id_tx_mr_hdl, &state->id_tx_mr_desc) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_tx_copybufs: ibt_register_mr failed");
kmem_free(state->id_tx_wqes,
- state->id_num_swqe * sizeof (ibd_swqe_t));
+ state->id_ud_num_swqe * sizeof (ibd_swqe_t));
kmem_free(state->id_tx_bufs,
- state->id_num_swqe * state->id_tx_buf_sz);
+ state->id_ud_num_swqe * state->id_tx_buf_sz);
state->id_tx_bufs = NULL;
return (DDI_FAILURE);
}
@@ -3375,7 +3567,7 @@
/*
* Allocate the entire lso memory and register it
*/
- memsz = IBD_NUM_LSO_BUFS * IBD_LSO_BUFSZ;
+ memsz = state->id_num_lso_bufs * IBD_LSO_BUFSZ;
membase = kmem_zalloc(memsz, KM_SLEEP);
mem_attr.mr_vaddr = (uint64_t)(uintptr_t)membase;
@@ -3398,7 +3590,7 @@
* can always derive the address of a buflist entry from the address of
* an lso buffer.
*/
- buflist = kmem_zalloc(IBD_NUM_LSO_BUFS * sizeof (ibd_lsobuf_t),
+ buflist = kmem_zalloc(state->id_num_lso_bufs * sizeof (ibd_lsobuf_t),
KM_SLEEP);
/*
@@ -3406,7 +3598,7 @@
*/
memp = membase;
lbufp = buflist;
- for (i = 0; i < IBD_NUM_LSO_BUFS; i++) {
+ for (i = 0; i < state->id_num_lso_bufs; i++) {
lbufp->lb_isfree = 1;
lbufp->lb_buf = memp;
lbufp->lb_next = lbufp + 1;
@@ -3424,7 +3616,7 @@
bktp->bkt_bufl = buflist;
bktp->bkt_free_head = buflist;
bktp->bkt_mem = membase;
- bktp->bkt_nelem = IBD_NUM_LSO_BUFS;
+ bktp->bkt_nelem = state->id_num_lso_bufs;
bktp->bkt_nfree = bktp->bkt_nelem;
state->id_lso = bktp;
@@ -3450,7 +3642,7 @@
if (state->id_lso_policy && state->id_lso_capable) {
if (ibd_alloc_tx_lsobufs(state) != DDI_SUCCESS)
- state->id_lso_policy = B_FALSE;
+ state->id_lso_capable = B_FALSE;
}
mutex_enter(&state->id_tx_list.dl_mutex);
@@ -3472,7 +3664,7 @@
len = state->id_tx_buf_sz;
swqe = state->id_tx_wqes;
mutex_enter(&state->id_tx_list.dl_mutex);
- for (i = 0; i < state->id_num_swqe; i++, swqe++, bufaddr += len) {
+ for (i = 0; i < state->id_ud_num_swqe; i++, swqe++, bufaddr += len) {
swqe->swqe_next = NULL;
swqe->swqe_im_mblk = NULL;
@@ -3633,8 +3825,10 @@
/*
* Free txbuf memory
*/
- kmem_free(state->id_tx_wqes, state->id_num_swqe * sizeof (ibd_swqe_t));
- kmem_free(state->id_tx_bufs, state->id_num_swqe * state->id_tx_buf_sz);
+ kmem_free(state->id_tx_wqes, state->id_ud_num_swqe *
+ sizeof (ibd_swqe_t));
+ kmem_free(state->id_tx_bufs, state->id_ud_num_swqe *
+ state->id_tx_buf_sz);
state->id_tx_wqes = NULL;
state->id_tx_bufs = NULL;
}
@@ -3828,10 +4022,10 @@
*/
state->id_rx_buf_sz = state->id_mtu + IPOIB_GRH_SIZE;
- state->id_rx_bufs = kmem_zalloc(state->id_num_rwqe *
+ state->id_rx_bufs = kmem_zalloc(state->id_ud_num_rwqe *
state->id_rx_buf_sz, KM_SLEEP);
- state->id_rx_wqes = kmem_zalloc(state->id_num_rwqe *
+ state->id_rx_wqes = kmem_zalloc(state->id_ud_num_rwqe *
sizeof (ibd_rwqe_t), KM_SLEEP);
state->id_rx_nqueues = 1 << IBD_LOG_RX_POST;
@@ -3846,16 +4040,16 @@
* Do one memory registration on the entire rxbuf area
*/
mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->id_rx_bufs;
- mem_attr.mr_len = state->id_num_rwqe * state->id_rx_buf_sz;
+ mem_attr.mr_len = state->id_ud_num_rwqe * state->id_rx_buf_sz;
mem_attr.mr_as = NULL;
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
&state->id_rx_mr_hdl, &state->id_rx_mr_desc) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_rx_copybufs: ibt_register_mr failed");
kmem_free(state->id_rx_wqes,
- state->id_num_rwqe * sizeof (ibd_rwqe_t));
+ state->id_ud_num_rwqe * sizeof (ibd_rwqe_t));
kmem_free(state->id_rx_bufs,
- state->id_num_rwqe * state->id_rx_buf_sz);
+ state->id_ud_num_rwqe * state->id_rx_buf_sz);
state->id_rx_bufs = NULL;
state->id_rx_wqes = NULL;
return (DDI_FAILURE);
@@ -3928,7 +4122,7 @@
rwqe = state->id_rx_wqes;
bufaddr = state->id_rx_bufs;
list = NULL;
- for (i = 0; i < state->id_num_rwqe; i++, rwqe++, bufaddr += len) {
+ for (i = 0; i < state->id_ud_num_rwqe; i++, rwqe++, bufaddr += len) {
rwqe->w_state = state;
rwqe->w_freemsg_cb.free_func = ibd_freemsg_cb;
rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
@@ -4001,8 +4195,10 @@
}
kmem_free(state->id_rx_queues, state->id_rx_nqueues *
sizeof (ibd_rx_queue_t));
- kmem_free(state->id_rx_wqes, state->id_num_rwqe * sizeof (ibd_rwqe_t));
- kmem_free(state->id_rx_bufs, state->id_num_rwqe * state->id_rx_buf_sz);
+ kmem_free(state->id_rx_wqes, state->id_ud_num_rwqe *
+ sizeof (ibd_rwqe_t));
+ kmem_free(state->id_rx_bufs, state->id_ud_num_rwqe *
+ state->id_rx_buf_sz);
state->id_rx_queues = NULL;
state->id_rx_wqes = NULL;
state->id_rx_bufs = NULL;
@@ -4017,7 +4213,7 @@
mutex_exit(&state->id_rx_free_list.dl_mutex);
return;
}
- ASSERT(state->id_rx_free_list.dl_cnt == state->id_num_rwqe);
+ ASSERT(state->id_rx_free_list.dl_cnt == state->id_ud_num_rwqe);
ibd_free_rx_copybufs(state);
state->id_rx_free_list.dl_cnt = 0;
state->id_rx_free_list.dl_head = NULL;
@@ -4182,6 +4378,15 @@
case IBT_SM_EVENT_MCG_CREATED:
case IBT_SM_EVENT_MCG_DELETED:
/*
+ * If it is a "deleted" event and we are in late hca
+ * init, nothing to do.
+ */
+ if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
+ IBD_DRV_IN_LATE_HCA_INIT) && (code ==
+ IBT_SM_EVENT_MCG_DELETED)) {
+ break;
+ }
+ /*
* Common processing of creation/deletion traps.
* First check if the instance is being
* [de]initialized; back off then, without doing
@@ -4206,10 +4411,50 @@
{
ib_gid_t mgid = req->rq_gid;
ibt_subnet_event_code_t code = (ibt_subnet_event_code_t)req->rq_ptr;
+ int ret;
+ ib_pkey_t pkey = (mgid.gid_prefix >> 16) & 0xffff;
DPRINT(10, "ibd_async_trap : %d\n", code);
/*
+ * Check if we have already joined the IPoIB broadcast group for our
+ * PKEY. If joined, perform the rest of the operation.
+ * Else, the interface is not initialised. Do the initialisation here
+ * by calling ibd_start() and return.
+ */
+
+ if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
+ IBD_DRV_IN_LATE_HCA_INIT) && (state->id_bgroup_present == 0) &&
+ (code == IBT_SM_EVENT_MCG_CREATED)) {
+ /*
+ * If we are in late HCA init and a notification for the
+ * creation of a MCG came in, check if it is the IPoIB MCG for
+ * this pkey. If not, return.
+ */
+ if ((mgid.gid_guid != IB_MGID_IPV4_LOWGRP_MASK) || (pkey !=
+ state->id_pkey)) {
+ ibd_async_done(state);
+ return;
+ }
+ ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
+ /*
+ * Check if there is still a necessity to start the interface.
+ * It is possible that the user attempted unplumb at just about
+ * the same time, and if unplumb succeeded, we have nothing to
+ * do.
+ */
+ if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
+ IBD_DRV_IN_LATE_HCA_INIT) &&
+ ((ret = ibd_start(state)) != 0)) {
+ DPRINT(10, "ibd_async_trap: cannot start from late HCA "
+ "init, ret=%d", ret);
+ }
+ ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
+ ibd_async_done(state);
+ return;
+ }
+
+ /*
* Atomically search the nonmember and sendonlymember lists and
* delete.
*/
@@ -4250,6 +4495,9 @@
{
ibd_state_t *state = arg;
+ if (state->id_type == IBD_PORT_DRIVER)
+ return (B_FALSE);
+
switch (cap) {
case MAC_CAPAB_HCKSUM: {
uint32_t *txflags = cap_data;
@@ -4299,6 +4547,759 @@
return (B_TRUE);
}
+/*
+ * callback function for set/get of properties
+ */
+static int
+ibd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ ibd_state_t *state = arg;
+ int err = 0;
+ uint32_t link_mode;
+
+ /* Cannot set properties on a port driver */
+ if (state->id_type == IBD_PORT_DRIVER) {
+ return (ENOTSUP);
+ }
+
+ switch (pr_num) {
+ case MAC_PROP_IB_LINKMODE:
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ err = EBUSY;
+ break;
+ }
+ if (pr_val == NULL) {
+ err = EINVAL;
+ break;
+ }
+ bcopy(pr_val, &link_mode, sizeof (link_mode));
+ if (link_mode != IBD_LINK_MODE_UD &&
+ link_mode != IBD_LINK_MODE_RC) {
+ err = EINVAL;
+ } else {
+ if (link_mode == IBD_LINK_MODE_RC) {
+ if (state->id_enable_rc) {
+ return (0);
+ }
+ state->id_enable_rc = 1;
+ /* inform MAC framework of new MTU */
+ err = mac_maxsdu_update(state->id_mh,
+ state->rc_mtu - IPOIB_HDRSIZE);
+ } else {
+ if (!state->id_enable_rc) {
+ return (0);
+ }
+ state->id_enable_rc = 0;
+ err = mac_maxsdu_update(state->id_mh,
+ state->id_mtu - IPOIB_HDRSIZE);
+ }
+ (void) ibd_record_capab(state);
+ mac_capab_update(state->id_mh);
+ }
+ break;
+ case MAC_PROP_PRIVATE:
+ err = ibd_set_priv_prop(state, pr_name,
+ pr_valsize, pr_val);
+ break;
+ default:
+ err = ENOTSUP;
+ break;
+ }
+ return (err);
+}
+
+static int
+ibd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ ibd_state_t *state = arg;
+ int err = 0;
+
+ switch (pr_num) {
+ case MAC_PROP_MTU:
+ break;
+ default:
+ if (state->id_type == IBD_PORT_DRIVER) {
+ return (ENOTSUP);
+ }
+ break;
+ }
+
+ switch (pr_num) {
+ case MAC_PROP_IB_LINKMODE:
+ *(uint_t *)pr_val = state->id_enable_rc;
+ break;
+ case MAC_PROP_PRIVATE:
+ err = ibd_get_priv_prop(state, pr_name, pr_valsize,
+ pr_val);
+ break;
+ default:
+ err = ENOTSUP;
+ break;
+ }
+ return (err);
+}
+
+static void
+ibd_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ ibd_state_t *state = arg;
+
+ switch (pr_num) {
+ case MAC_PROP_IB_LINKMODE: {
+ mac_prop_info_set_default_uint32(prh, IBD_DEF_LINK_MODE);
+ break;
+ }
+ case MAC_PROP_MTU: {
+ uint32_t min, max;
+ if (state->id_type == IBD_PORT_DRIVER) {
+ min = 1500;
+ max = IBD_DEF_RC_MAX_SDU;
+ } else if (state->id_enable_rc) {
+ min = max = IBD_DEF_RC_MAX_SDU;
+ } else {
+ min = max = state->id_mtu - IPOIB_HDRSIZE;
+ }
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_range_uint32(prh, min, max);
+ break;
+ }
+ case MAC_PROP_PRIVATE: {
+ char valstr[64];
+ int value;
+
+ if (strcmp(pr_name, "_ibd_broadcast_group") == 0) {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ return;
+ } else if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
+ value = IBD_DEF_COALESCE_COMPLETIONS;
+ } else if (strcmp(pr_name,
+ "_ibd_create_broadcast_group") == 0) {
+ value = IBD_DEF_CREATE_BCAST_GROUP;
+ } else if (strcmp(pr_name, "_ibd_hash_size") == 0) {
+ value = IBD_DEF_HASH_SIZE;
+ } else if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
+ value = IBD_DEF_LSO_POLICY;
+ } else if (strcmp(pr_name, "_ibd_num_ah") == 0) {
+ value = IBD_DEF_NUM_AH;
+ } else if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
+ value = IBD_DEF_NUM_LSO_BUFS;
+ } else if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
+ value = IBD_DEF_RC_ENABLE_SRQ;
+ } else if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
+ value = IBD_DEF_RC_NUM_RWQE;
+ } else if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
+ value = IBD_DEF_RC_NUM_SRQ;
+ } else if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
+ value = IBD_DEF_RC_NUM_SWQE;
+ } else if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
+ value = IBD_DEF_RC_RX_COMP_COUNT;
+ } else if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
+ value = IBD_DEF_RC_RX_COMP_USEC;
+ } else if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
+ value = IBD_DEF_RC_RX_COPY_THRESH;
+ } else if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
+ value = IBD_DEF_RC_RX_RWQE_THRESH;
+ } else if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
+ value = IBD_DEF_RC_TX_COMP_COUNT;
+ } else if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
+ value = IBD_DEF_RC_TX_COMP_USEC;
+ } else if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
+ value = IBD_DEF_RC_TX_COPY_THRESH;
+ } else if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
+ value = IBD_DEF_UD_NUM_RWQE;
+ } else if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
+ value = IBD_DEF_UD_NUM_SWQE;
+ } else if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
+ value = IBD_DEF_UD_RX_COMP_COUNT;
+ } else if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
+ value = IBD_DEF_UD_RX_COMP_USEC;
+ } else if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
+ value = IBD_DEF_UD_TX_COMP_COUNT;
+ } else if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
+ value = IBD_DEF_UD_TX_COMP_USEC;
+ } else if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
+ value = IBD_DEF_UD_TX_COPY_THRESH;
+ } else {
+ return;
+ }
+
+ (void) snprintf(valstr, sizeof (valstr), "%d", value);
+ mac_prop_info_set_default_str(prh, valstr);
+ break;
+ }
+ } /* switch (pr_num) */
+}
+
+/* ARGSUSED2 */
+static int
+ibd_set_priv_prop(ibd_state_t *state, const char *pr_name,
+ uint_t pr_valsize, const void *pr_val)
+{
+ int err = 0;
+ long result;
+
+ if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 0 || result > 1) {
+ err = EINVAL;
+ } else {
+ state->id_allow_coalesce_comp_tuning = (result == 1) ?
+ B_TRUE: B_FALSE;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_create_broadcast_group") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 0 || result > 1) {
+ err = EINVAL;
+ } else {
+ state->id_create_broadcast_group = (result == 1) ?
+ B_TRUE: B_FALSE;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_hash_size") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_HASH_SIZE || result > IBD_MAX_HASH_SIZE) {
+ err = EINVAL;
+ } else {
+ state->id_hash_size = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 0 || result > 1) {
+ err = EINVAL;
+ } else {
+ state->id_lso_policy = (result == 1) ?
+ B_TRUE: B_FALSE;
+ }
+ mac_capab_update(state->id_mh);
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_num_ah") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_NUM_AH || result > IBD_MAX_NUM_AH) {
+ err = EINVAL;
+ } else {
+ state->id_num_ah = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (!state->id_lso_policy || !state->id_lso_capable) {
+ return (EINVAL);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_NUM_LSO_BUFS ||
+ result > IBD_MAX_NUM_LSO_BUFS) {
+ err = EINVAL;
+ } else {
+ state->id_num_lso_bufs = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 0 || result > 1) {
+ err = EINVAL;
+ } else {
+ state->rc_enable_srq = (result == 1) ?
+ B_TRUE: B_FALSE;
+ }
+ if (!state->rc_enable_srq) {
+ state->id_rc_num_srq = 0;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_RC_NUM_RWQE ||
+ result > IBD_MAX_RC_NUM_RWQE) {
+ err = EINVAL;
+ } else {
+ state->id_rc_num_rwqe = (uint32_t)result;
+ if (state->id_allow_coalesce_comp_tuning &&
+ state->id_rc_rx_comp_count > state->id_rc_num_rwqe)
+ state->id_rc_rx_comp_count =
+ state->id_rc_num_rwqe;
+ if (state->id_rc_num_srq > state->id_rc_num_rwqe)
+ state->id_rc_num_srq =
+ state->id_rc_num_rwqe - 1;
+ /*
+ * If rx_rwqe_threshold is greater than the number of
+ * rwqes, pull it back to 25% of number of rwqes.
+ */
+ if (state->id_rc_rx_rwqe_thresh > state->id_rc_num_rwqe)
+ state->id_rc_rx_rwqe_thresh =
+ (state->id_rc_num_rwqe >> 2);
+
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ if (!state->rc_enable_srq)
+ return (EINVAL);
+
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_RC_NUM_SRQ ||
+ result >= state->id_rc_num_rwqe) {
+ err = EINVAL;
+ } else
+ state->id_rc_num_srq = (uint32_t)result;
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_RC_NUM_SWQE ||
+ result > IBD_MAX_RC_NUM_SWQE) {
+ err = EINVAL;
+ } else {
+ state->id_rc_num_swqe = (uint32_t)result;
+ if (state->id_allow_coalesce_comp_tuning &&
+ state->id_rc_tx_comp_count > state->id_rc_num_swqe)
+ state->id_rc_tx_comp_count =
+ state->id_rc_num_swqe;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1 || result > state->id_rc_num_rwqe) {
+ err = EINVAL;
+ } else {
+ state->id_rc_rx_comp_count = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1) {
+ err = EINVAL;
+ } else {
+ state->id_rc_rx_comp_usec = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_RC_RX_COPY_THRESH ||
+ result > state->rc_mtu) {
+ err = EINVAL;
+ } else {
+ state->id_rc_rx_copy_thresh = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_RC_RX_RWQE_THRESH ||
+ result >= state->id_rc_num_rwqe) {
+ err = EINVAL;
+ } else {
+ state->id_rc_rx_rwqe_thresh = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1 || result > state->id_rc_num_swqe) {
+ err = EINVAL;
+ } else {
+ state->id_rc_tx_comp_count = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1)
+ err = EINVAL;
+ else {
+ state->id_rc_tx_comp_usec = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_RC_TX_COPY_THRESH ||
+ result > state->rc_mtu) {
+ err = EINVAL;
+ } else {
+ state->id_rc_tx_copy_thresh = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_UD_NUM_RWQE ||
+ result > IBD_MAX_UD_NUM_RWQE) {
+ err = EINVAL;
+ } else {
+ if (result > state->id_hca_max_chan_sz) {
+ state->id_ud_num_rwqe =
+ state->id_hca_max_chan_sz;
+ } else {
+ state->id_ud_num_rwqe = (uint32_t)result;
+ }
+ if (state->id_allow_coalesce_comp_tuning &&
+ state->id_ud_rx_comp_count > state->id_ud_num_rwqe)
+ state->id_ud_rx_comp_count =
+ state->id_ud_num_rwqe;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_UD_NUM_SWQE ||
+ result > IBD_MAX_UD_NUM_SWQE) {
+ err = EINVAL;
+ } else {
+ if (result > state->id_hca_max_chan_sz) {
+ state->id_ud_num_swqe =
+ state->id_hca_max_chan_sz;
+ } else {
+ state->id_ud_num_swqe = (uint32_t)result;
+ }
+ if (state->id_allow_coalesce_comp_tuning &&
+ state->id_ud_tx_comp_count > state->id_ud_num_swqe)
+ state->id_ud_tx_comp_count =
+ state->id_ud_num_swqe;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1 || result > state->id_ud_num_rwqe) {
+ err = EINVAL;
+ } else {
+ state->id_ud_rx_comp_count = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1) {
+ err = EINVAL;
+ } else {
+ state->id_ud_rx_comp_usec = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1 || result > state->id_ud_num_swqe) {
+ err = EINVAL;
+ } else {
+ state->id_ud_tx_comp_count = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
+ if (!state->id_allow_coalesce_comp_tuning) {
+ return (ENOTSUP);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < 1) {
+ err = EINVAL;
+ } else {
+ state->id_ud_tx_comp_usec = (uint32_t)result;
+ }
+ return (err);
+ }
+ if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
+ if (state->id_mac_state & IBD_DRV_STARTED) {
+ return (EBUSY);
+ }
+ if (pr_val == NULL) {
+ return (EINVAL);
+ }
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ if (result < IBD_MIN_UD_TX_COPY_THRESH ||
+ result > IBD_MAX_UD_TX_COPY_THRESH) {
+ err = EINVAL;
+ } else {
+ state->id_ud_tx_copy_thresh = (uint32_t)result;
+ }
+ return (err);
+ }
+ return (ENOTSUP);
+}
+
+static int
+ibd_get_priv_prop(ibd_state_t *state, const char *pr_name, uint_t pr_valsize,
+ void *pr_val)
+{
+ int err = ENOTSUP;
+ int value;
+
+ if (strcmp(pr_name, "_ibd_broadcast_group") == 0) {
+ value = state->id_bgroup_present;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
+ value = state->id_allow_coalesce_comp_tuning;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_create_broadcast_group") == 0) {
+ value = state->id_create_broadcast_group;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_hash_size") == 0) {
+ value = state->id_hash_size;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
+ value = state->id_lso_policy;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_num_ah") == 0) {
+ value = state->id_num_ah;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
+ value = state->id_num_lso_bufs;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
+ value = state->rc_enable_srq;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
+ value = state->id_rc_num_rwqe;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
+ value = state->id_rc_num_srq;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
+ value = state->id_rc_num_swqe;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
+ value = state->id_rc_rx_comp_count;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
+ value = state->id_rc_rx_comp_usec;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
+ value = state->id_rc_rx_copy_thresh;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
+ value = state->id_rc_rx_rwqe_thresh;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
+ value = state->id_rc_tx_comp_count;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
+ value = state->id_rc_tx_comp_usec;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
+ value = state->id_rc_tx_copy_thresh;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
+ value = state->id_ud_num_rwqe;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
+ value = state->id_ud_num_swqe;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
+ value = state->id_ud_rx_comp_count;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
+ value = state->id_ud_rx_comp_usec;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
+ value = state->id_ud_tx_comp_count;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
+ value = state->id_ud_tx_comp_usec;
+ err = 0;
+ goto done;
+ }
+ if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
+ value = state->id_ud_tx_copy_thresh;
+ err = 0;
+ goto done;
+ }
+done:
+ if (err == 0) {
+ (void) snprintf(pr_val, pr_valsize, "%d", value);
+ }
+ return (err);
+}
+
static int
ibd_get_port_details(ibd_state_t *state)
{
@@ -4321,42 +5322,32 @@
}
/*
- * If the link already went down by the time we get here,
- * give up
- */
- if (port_infop->p_linkstate != IBT_PORT_ACTIVE) {
- mutex_exit(&state->id_link_mutex);
- ibt_free_portinfo(port_infop, port_infosz);
- DPRINT(10, "ibd_get_port_details: port is not active");
- return (ENETDOWN);
- }
-
- /*
* If the link is active, verify the pkey
*/
- if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
- state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
- mutex_exit(&state->id_link_mutex);
- ibt_free_portinfo(port_infop, port_infosz);
- DPRINT(10, "ibd_get_port_details: ibt_pkey2index "
- "failed, ret=%d", ret);
- return (ENONET);
- }
-
- state->id_mtu = (128 << port_infop->p_mtu);
- _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
- state->id_sgid = *port_infop->p_sgid_tbl;
- _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
- state->id_link_state = LINK_STATE_UP;
-
+ if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
+ if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
+ state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
+ state->id_link_state = LINK_STATE_DOWN;
+ } else {
+ state->id_link_state = LINK_STATE_UP;
+ }
+ state->id_mtu = (128 << port_infop->p_mtu);
+ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
+ state->id_sgid = *port_infop->p_sgid_tbl;
+ _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
+ /*
+ * Now that the port is active, record the port speed
+ */
+ state->id_link_speed = ibd_get_portspeed(state);
+ } else {
+ /* Make sure that these are handled in PORT_UP/CHANGE */
+ state->id_mtu = 0;
+ state->id_link_state = LINK_STATE_DOWN;
+ state->id_link_speed = 0;
+ }
mutex_exit(&state->id_link_mutex);
ibt_free_portinfo(port_infop, port_infosz);
- /*
- * Now that the port is active, record the port speed
- */
- state->id_link_speed = ibd_get_portspeed(state);
-
return (0);
}
@@ -4367,6 +5358,8 @@
ibt_cq_attr_t cq_attr;
ibt_status_t ret;
uint32_t real_size;
+ uint_t num_rwqe_change = 0;
+ uint_t num_swqe_change = 0;
ret = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
ASSERT(ret == IBT_SUCCESS);
@@ -4385,11 +5378,12 @@
/*
* Allocate Receive CQ.
*/
- if (hca_attrs.hca_max_cq_sz >= (state->id_num_rwqe + 1)) {
- cq_attr.cq_size = state->id_num_rwqe + 1;
+ if (hca_attrs.hca_max_cq_sz >= (state->id_ud_num_rwqe + 1)) {
+ cq_attr.cq_size = state->id_ud_num_rwqe + 1;
} else {
cq_attr.cq_size = hca_attrs.hca_max_cq_sz;
- state->id_num_rwqe = cq_attr.cq_size - 1;
+ num_rwqe_change = state->id_ud_num_rwqe;
+ state->id_ud_num_rwqe = cq_attr.cq_size - 1;
}
if ((ret = ibt_alloc_cq(state->id_hca_hdl, &cq_attr,
@@ -4399,8 +5393,8 @@
return (DDI_FAILURE);
}
- if ((ret = ibt_modify_cq(state->id_rcq_hdl,
- ibd_rxcomp_count, ibd_rxcomp_usec, 0)) != IBT_SUCCESS) {
+ if ((ret = ibt_modify_cq(state->id_rcq_hdl, state->id_ud_rx_comp_count,
+ state->id_ud_rx_comp_usec, 0)) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_cqs: Receive CQ interrupt "
"moderation failed, ret=%d\n", ret);
}
@@ -4413,11 +5407,12 @@
/*
* Allocate Send CQ.
*/
- if (hca_attrs.hca_max_cq_sz >= (state->id_num_swqe + 1)) {
- cq_attr.cq_size = state->id_num_swqe + 1;
+ if (hca_attrs.hca_max_cq_sz >= (state->id_ud_num_swqe + 1)) {
+ cq_attr.cq_size = state->id_ud_num_swqe + 1;
} else {
cq_attr.cq_size = hca_attrs.hca_max_cq_sz;
- state->id_num_swqe = cq_attr.cq_size - 1;
+ num_swqe_change = state->id_ud_num_swqe;
+ state->id_ud_num_swqe = cq_attr.cq_size - 1;
}
if ((ret = ibt_alloc_cq(state->id_hca_hdl, &cq_attr,
@@ -4429,8 +5424,8 @@
(void) ibt_free_cq(state->id_rcq_hdl);
return (DDI_FAILURE);
}
- if ((ret = ibt_modify_cq(state->id_scq_hdl,
- ibd_txcomp_count, ibd_txcomp_usec, 0)) != IBT_SUCCESS) {
+ if ((ret = ibt_modify_cq(state->id_scq_hdl, state->id_ud_tx_comp_count,
+ state->id_ud_tx_comp_usec, 0)) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_cqs: Send CQ interrupt "
"moderation failed, ret=%d\n", ret);
}
@@ -4443,13 +5438,13 @@
* Print message in case we could not allocate as many wqe's
* as was requested.
*/
- if (state->id_num_rwqe != IBD_NUM_RWQE) {
+ if (num_rwqe_change) {
ibd_print_warn(state, "Setting #rwqe = %d instead of default "
- "%d", state->id_num_rwqe, IBD_NUM_RWQE);
- }
- if (state->id_num_swqe != IBD_NUM_SWQE) {
+ "%d", state->id_ud_num_rwqe, num_rwqe_change);
+ }
+ if (num_swqe_change) {
ibd_print_warn(state, "Setting #swqe = %d instead of default "
- "%d", state->id_num_swqe, IBD_NUM_SWQE);
+ "%d", state->id_ud_num_swqe, num_swqe_change);
}
return (DDI_SUCCESS);
@@ -4471,8 +5466,8 @@
ud_alloc_attr.ud_hca_port_num = state->id_port;
ud_alloc_attr.ud_sizes.cs_sq_sgl = state->id_max_sqseg;
ud_alloc_attr.ud_sizes.cs_rq_sgl = IBD_MAX_RQSEG;
- ud_alloc_attr.ud_sizes.cs_sq = state->id_num_swqe;
- ud_alloc_attr.ud_sizes.cs_rq = state->id_num_rwqe;
+ ud_alloc_attr.ud_sizes.cs_sq = state->id_ud_num_swqe;
+ ud_alloc_attr.ud_sizes.cs_rq = state->id_ud_num_rwqe;
ud_alloc_attr.ud_qkey = state->id_mcinfo->mc_qkey;
ud_alloc_attr.ud_scq = state->id_scq_hdl;
ud_alloc_attr.ud_rcq = state->id_rcq_hdl;
@@ -4528,6 +5523,7 @@
state->id_link_state = LINK_STATE_UNKNOWN;
}
mutex_exit(&state->id_link_mutex);
+ bzero(&state->id_macaddr, sizeof (ipoib_mac_t));
mac_link_update(state->id_mh, state->id_link_state);
state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED);
@@ -4535,6 +5531,10 @@
state->id_mac_state &= (~IBD_DRV_STARTED);
}
+ if (progress & IBD_DRV_IN_LATE_HCA_INIT) {
+ state->id_mac_state &= (~IBD_DRV_IN_LATE_HCA_INIT);
+ }
+
/* Stop listen under Reliable Connected Mode */
if (progress & IBD_DRV_RC_LISTEN) {
ASSERT(state->id_enable_rc);
@@ -4626,7 +5626,7 @@
mutex_enter(&state->id_tx_rel_list.dl_mutex);
attempts = 10;
while (state->id_tx_list.dl_cnt + state->id_tx_rel_list.dl_cnt
- != state->id_num_swqe) {
+ != state->id_ud_num_swqe) {
if (--attempts == 0)
break;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
@@ -4637,7 +5637,7 @@
}
ibt_set_cq_handler(state->id_scq_hdl, 0, 0);
if (state->id_tx_list.dl_cnt + state->id_tx_rel_list.dl_cnt !=
- state->id_num_swqe) {
+ state->id_ud_num_swqe) {
cmn_err(CE_WARN, "tx resources not freed\n");
}
mutex_exit(&state->id_tx_rel_list.dl_mutex);
@@ -4657,28 +5657,6 @@
state->id_mac_state &= (~IBD_DRV_SCQ_NOTIFY_ENABLED);
}
- if (progress & IBD_DRV_ASYNC_THR_CREATED) {
- /*
- * No new async requests will be posted since the device
- * link state has been marked as unknown; completion handlers
- * have been turned off, so Tx handler will not cause any
- * more IBD_ASYNC_REAP requests.
- *
- * Queue a request for the async thread to exit, which will
- * be serviced after any pending ones. This can take a while,
- * specially if the SM is unreachable, since IBMF will slowly
- * timeout each SM request issued by the async thread. Reap
- * the thread before continuing on, we do not want it to be
- * lingering in modunloaded code (or we could move the reap
- * to ibd_detach(), provided we keep track of the current
- * id_async_thrid somewhere safe).
- */
- ibd_queue_work_slot(state, &state->id_ah_req, IBD_ASYNC_EXIT);
- thread_join(state->id_async_thrid);
-
- state->id_mac_state &= (~IBD_DRV_ASYNC_THR_CREATED);
- }
-
if (progress & IBD_DRV_BCAST_GROUP_JOINED) {
/*
* Drop all residual full/non membership. This includes full
@@ -4811,26 +5789,37 @@
ibd_state_t *state = arg;
int ret;
+ if (state->id_type == IBD_PORT_DRIVER)
+ return (EINVAL);
+
ibd_set_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
+ if (state->id_mac_state & IBD_DRV_IN_DELETION) {
+ ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
+ return (EIO);
+ }
ret = ibd_start(state);
-
ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
-
return (ret);
}
static int
ibd_start(ibd_state_t *state)
{
- kthread_t *kht;
int err;
ibt_status_t ret;
+ int late_hca_init = 0;
if (state->id_mac_state & IBD_DRV_STARTED)
return (DDI_SUCCESS);
- if (atomic_inc_32_nv(&state->id_running) != 1) {
+ /*
+ * We do not increment the running flag when calling ibd_start() as
+ * a result of some event which moves the state away from late HCA
+ * initialization viz. MCG_CREATED, PORT_CHANGE or link availability.
+ */
+ if (!(state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) &&
+ (atomic_inc_32_nv(&state->id_running) != 1)) {
DPRINT(10, "ibd_start: id_running is non-zero");
cmn_err(CE_WARN, "ibd_start: id_running was not 0\n");
atomic_dec_32(&state->id_running);
@@ -4838,22 +5827,31 @@
}
/*
- * Get port details; if we fail here, very likely the port
- * state is inactive or the pkey can't be verified.
+ * Get port details; if we fail here, something bad happened.
+ * Fail plumb.
*/
if ((err = ibd_get_port_details(state)) != 0) {
DPRINT(10, "ibd_start: ibd_get_port_details() failed");
goto start_fail;
}
+ /*
+ * If state->id_link_state is DOWN, it indicates that either the port
+ * is down, or the pkey is not available. In both cases, resort to late
+ * initialization. Register for subnet notices, and return success.
+ */
state->id_mac_state |= IBD_DRV_PORT_DETAILS_OBTAINED;
+ if (state->id_link_state == LINK_STATE_DOWN) {
+ late_hca_init = 1;
+ goto late_hca_init_return;
+ }
/*
* Find the IPoIB broadcast group
*/
if (ibd_find_bgroup(state) != IBT_SUCCESS) {
- DPRINT(10, "ibd_start: ibd_find_bgroup() failed");
- err = ENOTACTIVE;
- goto start_fail;
+ /* Resort to late initialization */
+ late_hca_init = 1;
+ goto reg_snet_notices;
}
state->id_mac_state |= IBD_DRV_BCAST_GROUP_FOUND;
@@ -4932,16 +5930,6 @@
state->id_mac_state |= IBD_DRV_BCAST_GROUP_JOINED;
/*
- * Create the async thread; thread_create never fails.
- */
- kht = thread_create(NULL, 0, ibd_async_work, state, 0, &p0,
- TS_RUN, minclsyspri);
- _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_async_thrid))
- state->id_async_thrid = kht->t_did;
- _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_async_thrid))
- state->id_mac_state |= IBD_DRV_ASYNC_THR_CREATED;
-
- /*
* When we did mac_register() in ibd_attach(), we didn't register
* the real macaddr and we didn't have the true port mtu. Now that
* we're almost ready, set the local mac address and broadcast
@@ -4980,7 +5968,9 @@
}
state->id_mac_state |= IBD_DRV_RCQ_NOTIFY_ENABLED;
- /*
+reg_snet_notices:
+ /*
+ * In case of normal initialization sequence,
* Setup the subnet notices handler after we've initialized the acache/
* mcache and started the async thread, both of which are required for
* the trap handler to function properly.
@@ -4989,13 +5979,37 @@
* a mac_register() during attach so mac_tx_update() can be called
* if necessary without any problem), we can enable the trap handler
* to queue requests to the async thread.
- */
- ibt_register_subnet_notices(state->id_ibt_hdl,
- ibd_snet_notices_handler, state);
- mutex_enter(&state->id_trap_lock);
- state->id_trap_stop = B_FALSE;
- mutex_exit(&state->id_trap_lock);
- state->id_mac_state |= IBD_DRV_SM_NOTICES_REGISTERED;
+ *
+ * In case of late hca initialization, the subnet notices handler will
+ * only handle MCG created/deleted event. The action performed as part
+ * of handling these events is to start the interface. So, the
+ * acache/mcache initialization is not a necessity in such cases for
+ * registering the subnet notices handler. Also, if we are in
+ * ibd_start() as a result of, say, some event handling after entering
+ * late hca initialization phase no need to register again.
+ */
+ if ((state->id_mac_state & IBD_DRV_SM_NOTICES_REGISTERED) == 0) {
+ ibt_register_subnet_notices(state->id_ibt_hdl,
+ ibd_snet_notices_handler, state);
+ mutex_enter(&state->id_trap_lock);
+ state->id_trap_stop = B_FALSE;
+ mutex_exit(&state->id_trap_lock);
+ state->id_mac_state |= IBD_DRV_SM_NOTICES_REGISTERED;
+ }
+
+late_hca_init_return:
+ if (late_hca_init == 1) {
+ state->id_mac_state |= IBD_DRV_IN_LATE_HCA_INIT;
+ /*
+ * In case of late initialization, mark the link state as down,
+ * immaterial of the actual link state as reported in the
+ * port_info.
+ */
+ state->id_link_state = LINK_STATE_DOWN;
+ mac_unicst_update(state->id_mh, (uint8_t *)&state->id_macaddr);
+ mac_link_update(state->id_mh, state->id_link_state);
+ return (DDI_SUCCESS);
+ }
if (state->id_enable_rc) {
if (state->rc_enable_srq) {
@@ -5043,8 +6057,9 @@
* notifications to GLDv3 till we reach here in the initialization
* sequence.
*/
+ mac_link_update(state->id_mh, state->id_link_state);
+ state->id_mac_state &= ~IBD_DRV_IN_LATE_HCA_INIT;
state->id_mac_state |= IBD_DRV_STARTED;
- mac_link_update(state->id_mh, state->id_link_state);
return (DDI_SUCCESS);
@@ -5068,6 +6083,9 @@
{
ibd_state_t *state = (ibd_state_t *)arg;
+ if (state->id_type == IBD_PORT_DRIVER)
+ return;
+
ibd_set_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS);
(void) ibd_undo_start(state, state->id_link_state);
@@ -5084,6 +6102,9 @@
{
ibd_state_t *state = arg;
+ if (state->id_type == IBD_PORT_DRIVER)
+ return (EINVAL);
+
/*
* Don't bother even comparing the macaddr if we haven't
* completed ibd_m_start().
@@ -5134,6 +6155,9 @@
ib_gid_t mgid;
ibd_req_t *req;
+ if (state->id_type == IBD_PORT_DRIVER)
+ return (EINVAL);
+
/*
* If we haven't completed ibd_m_start(), async thread wouldn't
* have been started and id_bcaddr wouldn't be set, so there's
@@ -5289,6 +6313,9 @@
ibd_state_t *state = (ibd_state_t *)arg;
ibd_req_t *req;
+ if (state->id_type == IBD_PORT_DRIVER)
+ return (EINVAL);
+
/*
* Async thread wouldn't have been started if we haven't
* passed ibd_m_start()
@@ -5700,7 +6727,7 @@
* the maximum acceptable.
*/
if ((state->id_hca_res_lkey_capab) &&
- (pktsize > IBD_TX_COPY_THRESH) &&
+ (pktsize > state->id_ud_tx_copy_thresh) &&
(nmblks < state->id_max_sqseg_hiwm)) {
ibt_iov_t iov_arr[IBD_MAX_SQSEG];
ibt_iov_attr_t iov_attr;
@@ -6073,7 +7100,7 @@
* the "copy-threshold", and if the number of mp
* fragments is less than the maximum acceptable.
*/
- if (pktsize <= ibd_rc_tx_copy_thresh) {
+ if (pktsize <= state->id_rc_tx_copy_thresh) {
atomic_inc_64(&state->rc_xmt_small_pkt);
/*
* Only process unicast packet in Reliable Connected
@@ -6358,7 +7385,13 @@
ibd_state_t *state = (ibd_state_t *)arg;
mblk_t *next;
- if (state->id_link_state != LINK_STATE_UP) {
+ if (state->id_type == IBD_PORT_DRIVER) {
+ freemsgchain(mp);
+ return (NULL);
+ }
+
+ if ((state->id_link_state != LINK_STATE_UP) ||
+ !(state->id_mac_state & IBD_DRV_STARTED)) {
freemsgchain(mp);
mp = NULL;
}
@@ -6502,7 +7535,7 @@
* Post more here, if less than one fourth full.
*/
if (atomic_add_32_nv(&state->id_rx_list.dl_cnt, -num_polled) <
- (state->id_num_rwqe / 4))
+ (state->id_ud_num_rwqe / 4))
ibd_post_recv_intr(state);
}
}
@@ -7054,3 +8087,765 @@
bcopy(tmpbuf, ibd_lbuf+off, msglen); /* no lock needed for this */
}
#endif
+
+/* ARGSUSED */
+static int
+ibd_create_partition(void *karg, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ ibd_create_ioctl_t *cmd = karg;
+ ibd_state_t *state, *port_state, *p;
+ int i, err, rval = 0;
+ mac_register_t *macp;
+ ibt_hca_portinfo_t *pinfop = NULL;
+ ibt_status_t ibt_status;
+ uint_t psize, pinfosz;
+ boolean_t force_create = B_FALSE;
+
+ cmd->ibdioc.ioc_status = 0;
+
+ if (cmd->ibdioc.ioc_port_inst < 0) {
+ cmd->ibdioc.ioc_status = IBD_INVALID_PORT_INST;
+ return (EINVAL);
+ }
+ port_state = ddi_get_soft_state(ibd_list, cmd->ibdioc.ioc_port_inst);
+ if (port_state == NULL) {
+ DPRINT(10, "ibd_create_partition: failed to get state %d",
+ cmd->ibdioc.ioc_port_inst);
+ cmd->ibdioc.ioc_status = IBD_INVALID_PORT_INST;
+ return (EINVAL);
+ }
+
+ /* Limited PKeys not supported */
+ if (cmd->ioc_pkey <= IB_PKEY_INVALID_FULL) {
+ rval = EINVAL;
+ goto part_create_return;
+ }
+
+ if (cmd->ioc_force_create == 0) {
+ /*
+ * Check if the port pkey table contains the pkey for which
+ * this partition is being created.
+ */
+ ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
+ port_state->id_port, &pinfop, &psize, &pinfosz);
+
+ if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
+ rval = EINVAL;
+ goto part_create_return;
+ }
+
+ if (pinfop->p_linkstate != IBT_PORT_ACTIVE) {
+ rval = ENETDOWN;
+ cmd->ibdioc.ioc_status = IBD_PORT_IS_DOWN;
+ goto part_create_return;
+ }
+
+ for (i = 0; i < pinfop->p_pkey_tbl_sz; i++) {
+ if (pinfop->p_pkey_tbl[i] == cmd->ioc_pkey) {
+ break;
+ }
+ }
+ if (i == pinfop->p_pkey_tbl_sz) {
+ rval = EINVAL;
+ cmd->ibdioc.ioc_status = IBD_PKEY_NOT_PRESENT;
+ goto part_create_return;
+ }
+ } else {
+ force_create = B_TRUE;
+ }
+
+ mutex_enter(&ibd_objlist_lock);
+ for (p = ibd_objlist_head; p; p = p->id_next) {
+ if ((p->id_port_inst == cmd->ibdioc.ioc_port_inst) &&
+ (p->id_pkey == cmd->ioc_pkey)) {
+ mutex_exit(&ibd_objlist_lock);
+ rval = EEXIST;
+ cmd->ibdioc.ioc_status = IBD_PARTITION_EXISTS;
+ goto part_create_return;
+ }
+ }
+ mutex_exit(&ibd_objlist_lock);
+
+ state = kmem_zalloc(sizeof (ibd_state_t), KM_SLEEP);
+
+ state->id_type = IBD_PARTITION_OBJ;
+
+ state->id_plinkid = cmd->ioc_partid;
+ state->id_dlinkid = cmd->ibdioc.ioc_linkid;
+ state->id_port_inst = cmd->ibdioc.ioc_port_inst;
+
+ state->id_dip = port_state->id_dip;
+ state->id_port = port_state->id_port;
+ state->id_pkey = cmd->ioc_pkey;
+ state->id_hca_guid = port_state->id_hca_guid;
+ state->id_port_guid = port_state->id_port_guid;
+ state->id_force_create = force_create;
+
+ mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL);
+
+ if (ibd_part_attach(state, state->id_dip) != DDI_SUCCESS) {
+ rval = EIO;
+ cmd->ibdioc.ioc_status = IBD_NO_HW_RESOURCE;
+ goto fail;
+ }
+
+ if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+ rval = EAGAIN;
+ goto fail;
+ }
+
+ macp->m_type_ident = MAC_PLUGIN_IDENT_IB;
+ macp->m_dip = port_state->id_dip;
+ macp->m_instance = (uint_t)-1;
+ macp->m_driver = state;
+ macp->m_src_addr = (uint8_t *)&state->id_macaddr;
+ macp->m_callbacks = &ibd_m_callbacks;
+ macp->m_min_sdu = 0;
+ if (state->id_enable_rc) {
+ macp->m_max_sdu = IBD_DEF_RC_MAX_SDU;
+ } else {
+ macp->m_max_sdu = IBD_DEF_MAX_SDU;
+ }
+ macp->m_priv_props = ibd_priv_props;
+
+ err = mac_register(macp, &state->id_mh);
+ mac_free(macp);
+
+ if (err != 0) {
+ DPRINT(10, "ibd_create_partition: mac_register() failed %d",
+ err);
+ rval = err;
+ goto fail;
+ }
+
+ err = dls_devnet_create(state->id_mh,
+ cmd->ioc_partid, crgetzoneid(credp));
+ if (err != 0) {
+ DPRINT(10, "ibd_create_partition: dls_devnet_create() failed "
+ "%d", err);
+ rval = err;
+ (void) mac_unregister(state->id_mh);
+ goto fail;
+ }
+
+ /*
+ * Add the new partition state structure to the list
+ */
+ mutex_enter(&ibd_objlist_lock);
+ if (ibd_objlist_head)
+ state->id_next = ibd_objlist_head;
+
+ ibd_objlist_head = state;
+ mutex_exit(&ibd_objlist_lock);
+
+part_create_return:
+ if (pinfop) {
+ ibt_free_portinfo(pinfop, pinfosz);
+ }
+ return (rval);
+
+fail:
+ if (pinfop) {
+ ibt_free_portinfo(pinfop, pinfosz);
+ }
+ (void) ibd_part_unattach(state);
+ kmem_free(state, sizeof (ibd_state_t));
+ return (rval);
+}
+
+/* ARGSUSED */
+static int
+ibd_delete_partition(void *karg, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ int err;
+ datalink_id_t tmpid;
+ ibd_state_t *node, *prev;
+ ibd_delete_ioctl_t *cmd = karg;
+
+ prev = NULL;
+
+ mutex_enter(&ibd_objlist_lock);
+ node = ibd_objlist_head;
+
+ /* Find the ibd state structure corresponding the partion */
+ while (node != NULL) {
+ if (node->id_plinkid == cmd->ioc_partid)
+ break;
+ prev = node;
+ node = node->id_next;
+ }
+
+ if (node == NULL) {
+ mutex_exit(&ibd_objlist_lock);
+ return (ENOENT);
+ }
+
+ if ((err = dls_devnet_destroy(node->id_mh, &tmpid, B_TRUE)) != 0) {
+ DPRINT(10, "ibd_delete_partition: dls_devnet_destroy() failed "
+ "%d", err);
+ mutex_exit(&ibd_objlist_lock);
+ return (err);
+ }
+
+ if ((err = mac_disable(node->id_mh)) != 0) {
+ (void) dls_devnet_create(node->id_mh, cmd->ioc_partid,
+ crgetzoneid(credp));
+ mutex_exit(&ibd_objlist_lock);
+ return (err);
+ }
+
+ /*
+ * Call ibd_part_unattach() only after making sure that the instance has
+ * not been started yet and is also not in late hca init mode.
+ */
+ ibd_set_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
+ if ((node->id_mac_state & IBD_DRV_STARTED) ||
+ (node->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ||
+ (ibd_part_unattach(node) != DDI_SUCCESS)) {
+ ibd_clr_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
+ mutex_exit(&ibd_objlist_lock);
+ return (EBUSY);
+ }
+ node->id_mac_state |= IBD_DRV_IN_DELETION;
+ ibd_clr_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
+
+ /* Remove the partition state structure from the linked list */
+ if (prev == NULL)
+ ibd_objlist_head = node->id_next;
+ else
+ prev->id_next = node->id_next;
+ mutex_exit(&ibd_objlist_lock);
+
+ if ((err = mac_unregister(node->id_mh)) != 0) {
+ DPRINT(10, "ibd_delete_partition: mac_unregister() failed %d",
+ err);
+ }
+
+ cv_destroy(&node->id_macst_cv);
+ mutex_destroy(&node->id_macst_lock);
+
+ kmem_free(node, sizeof (ibd_state_t));
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ibd_get_partition_info(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ ibd_ioctl_t cmd;
+ ibpart_ioctl_t partioc;
+ ibport_ioctl_t portioc;
+#ifdef _MULTI_DATAMODEL
+ ibport_ioctl32_t portioc32;
+#endif
+ ibd_state_t *state, *port_state;
+ int size;
+ ibt_hca_portinfo_t *pinfop = NULL;
+ ibt_status_t ibt_status;
+ uint_t psize, pinfosz;
+ int rval = 0;
+
+ size = sizeof (ibd_ioctl_t);
+ if (ddi_copyin((void *)arg, &cmd, size, mode)) {
+ return (EFAULT);
+ }
+ cmd.ioc_status = 0;
+ switch (cmd.ioc_info_cmd) {
+ case IBD_INFO_CMD_IBPART:
+ size = sizeof (ibpart_ioctl_t);
+ if (ddi_copyin((void *)arg, &partioc, size, mode)) {
+ return (EFAULT);
+ }
+
+ mutex_enter(&ibd_objlist_lock);
+ /* Find the ibd state structure corresponding the partition */
+ for (state = ibd_objlist_head; state; state = state->id_next) {
+ if (state->id_plinkid == cmd.ioc_linkid) {
+ break;
+ }
+ }
+
+ if (state == NULL) {
+ mutex_exit(&ibd_objlist_lock);
+ return (ENOENT);
+ }
+
+ partioc.ibdioc.ioc_linkid = state->id_dlinkid;
+ partioc.ibdioc.ioc_port_inst = state->id_port_inst;
+ partioc.ibdioc.ioc_portnum = state->id_port;
+ partioc.ibdioc.ioc_hcaguid = state->id_hca_guid;
+ partioc.ibdioc.ioc_portguid = state->id_port_guid;
+ partioc.ibdioc.ioc_status = 0;
+ partioc.ioc_partid = state->id_plinkid;
+ partioc.ioc_pkey = state->id_pkey;
+ partioc.ioc_force_create = state->id_force_create;
+ if (ddi_copyout((void *)&partioc, (void *)arg, size, mode)) {
+ mutex_exit(&ibd_objlist_lock);
+ return (EFAULT);
+ }
+ mutex_exit(&ibd_objlist_lock);
+
+ break;
+
+ case IBD_INFO_CMD_IBPORT:
+ if ((cmd.ioc_port_inst < 0) || ((port_state =
+ ddi_get_soft_state(ibd_list, cmd.ioc_port_inst)) == NULL)) {
+ DPRINT(10, "ibd_create_partition: failed to get"
+ " state %d", cmd.ioc_port_inst);
+ size = sizeof (ibd_ioctl_t);
+ cmd.ioc_status = IBD_INVALID_PORT_INST;
+ if (ddi_copyout((void *)&cmd, (void *)arg, size,
+ mode)) {
+ return (EFAULT);
+ }
+ return (EINVAL);
+ }
+ ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
+ port_state->id_port, &pinfop, &psize, &pinfosz);
+ if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
+ return (EINVAL);
+ }
+#ifdef _MULTI_DATAMODEL
+ switch (ddi_model_convert_from(mode & FMODELS)) {
+ case DDI_MODEL_ILP32: {
+ size = sizeof (ibport_ioctl32_t);
+ if (ddi_copyin((void *)arg, &portioc32, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ portioc32.ibdioc.ioc_status = 0;
+ portioc32.ibdioc.ioc_portnum = port_state->id_port;
+ portioc32.ibdioc.ioc_hcaguid =
+ port_state->id_hca_guid;
+ portioc32.ibdioc.ioc_portguid =
+ port_state->id_port_guid;
+ if (portioc32.ioc_pkey_tbl_sz !=
+ pinfop->p_pkey_tbl_sz) {
+ rval = EINVAL;
+ size = sizeof (ibd_ioctl_t);
+ portioc32.ibdioc.ioc_status =
+ IBD_INVALID_PKEY_TBL_SIZE;
+ if (ddi_copyout((void *)&portioc32.ibdioc,
+ (void *)arg, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ goto fail;
+ }
+ size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
+ if (ddi_copyout((void *)pinfop->p_pkey_tbl,
+ (void *)(uintptr_t)portioc32.ioc_pkeys, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ size = sizeof (ibport_ioctl32_t);
+ if (ddi_copyout((void *)&portioc32, (void *)arg, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ break;
+ }
+ case DDI_MODEL_NONE:
+ size = sizeof (ibport_ioctl_t);
+ if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ portioc.ibdioc.ioc_status = 0;
+ portioc.ibdioc.ioc_portnum = port_state->id_port;
+ portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+ portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+ if (portioc.ioc_pkey_tbl_sz != pinfop->p_pkey_tbl_sz) {
+ rval = EINVAL;
+ size = sizeof (ibd_ioctl_t);
+ portioc.ibdioc.ioc_status =
+ IBD_INVALID_PKEY_TBL_SIZE;
+ if (ddi_copyout((void *)&portioc.ibdioc,
+ (void *)arg, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ goto fail;
+ }
+ size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
+ if (ddi_copyout((void *)pinfop->p_pkey_tbl,
+ (void *)(portioc.ioc_pkeys), size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ size = sizeof (ibport_ioctl_t);
+ if (ddi_copyout((void *)&portioc, (void *)arg, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ break;
+ }
+#else /* ! _MULTI_DATAMODEL */
+ size = sizeof (ibport_ioctl_t);
+ if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ portioc.ibdioc.ioc_status = 0;
+ portioc.ibdioc.ioc_portnum = port_state->id_port;
+ portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+ portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+ if (portioc.ioc_pkey_tbl_sz != pinfop->p_pkey_tbl_sz) {
+ rval = EINVAL;
+ size = sizeof (ibd_ioctl_t);
+ portioc.ibdioc.ioc_status = IBD_INVALID_PKEY_TBL_SIZE;
+ if (ddi_copyout((void *)&portioc.ibdioc, (void *)arg,
+ size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ goto fail;
+ }
+ size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
+ if (ddi_copyout((void *)pinfop->p_pkey_tbl,
+ (void *)(portioc.ioc_pkeys), size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ size = sizeof (ibport_ioctl_t);
+ if (ddi_copyout((void *)&portioc, (void *)arg, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+#endif /* _MULTI_DATAMODEL */
+
+ break;
+
+ case IBD_INFO_CMD_PKEYTBLSZ:
+ if ((cmd.ioc_port_inst < 0) || ((port_state =
+ ddi_get_soft_state(ibd_list, cmd.ioc_port_inst)) == NULL)) {
+ DPRINT(10, "ibd_create_partition: failed to get"
+ " state %d", cmd.ioc_port_inst);
+ size = sizeof (ibd_ioctl_t);
+ cmd.ioc_status = IBD_INVALID_PORT_INST;
+ if (ddi_copyout((void *)&cmd, (void *)arg, size,
+ mode)) {
+ return (EFAULT);
+ }
+ return (EINVAL);
+ }
+ ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
+ port_state->id_port, &pinfop, &psize, &pinfosz);
+ if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
+ return (EINVAL);
+ }
+#ifdef _MULTI_DATAMODEL
+ switch (ddi_model_convert_from(mode & FMODELS)) {
+ case DDI_MODEL_ILP32: {
+ size = sizeof (ibport_ioctl32_t);
+ if (ddi_copyin((void *)arg, &portioc32, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ portioc32.ibdioc.ioc_status = 0;
+ portioc32.ibdioc.ioc_portnum = port_state->id_port;
+ portioc32.ibdioc.ioc_hcaguid =
+ port_state->id_hca_guid;
+ portioc32.ibdioc.ioc_portguid =
+ port_state->id_port_guid;
+ portioc32.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
+ if (ddi_copyout((void *)&portioc32, (void *)arg, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ break;
+ }
+ case DDI_MODEL_NONE:
+ size = sizeof (ibport_ioctl_t);
+ if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ portioc.ibdioc.ioc_status = 0;
+ portioc.ibdioc.ioc_portnum = port_state->id_port;
+ portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+ portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+ portioc.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
+ if (ddi_copyout((void *)&portioc, (void *)arg, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ break;
+ }
+#else /* ! _MULTI_DATAMODEL */
+ size = sizeof (ibport_ioctl_t);
+ if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+ portioc.ibdioc.ioc_status = 0;
+ portioc.ibdioc.ioc_portnum = port_state->id_port;
+ portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+ portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+ portioc.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
+ if (ddi_copyout((void *)&portioc, (void *)arg, size,
+ mode)) {
+ rval = EFAULT;
+ goto fail;
+ }
+#endif /* _MULTI_DATAMODEL */
+ break;
+
+ default:
+ return (EINVAL);
+
+ } /* switch (cmd.ioc_info_cmd) */
+fail:
+ if (pinfop) {
+ ibt_free_portinfo(pinfop, pinfosz);
+ }
+ return (rval);
+}
+
+/* ARGSUSED */
+static void
+ibdpd_async_handler(void *arg, ibt_hca_hdl_t hca_hdl,
+ ibt_async_code_t code, ibt_async_event_t *event)
+{
+ ibd_state_t *state = (ibd_state_t *)arg;
+ link_state_t lstate;
+
+ switch (code) {
+ case IBT_EVENT_PORT_UP:
+ case IBT_ERROR_PORT_DOWN:
+ if (ibd_get_port_state(state, &lstate) != 0)
+ break;
+
+ if (state->id_link_state != lstate) {
+ state->id_link_state = lstate;
+ mac_link_update(state->id_mh, lstate);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static int
+ibd_get_port_state(ibd_state_t *state, link_state_t *lstate)
+{
+ ibt_hca_portinfo_t *port_infop;
+ uint_t psize, port_infosz;
+ ibt_status_t ret;
+
+ ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
+ &port_infop, &psize, &port_infosz);
+ if ((ret != IBT_SUCCESS) || (psize != 1))
+ return (-1);
+
+ state->id_sgid = *port_infop->p_sgid_tbl;
+ state->id_link_speed = ibd_get_portspeed(state);
+
+ if (port_infop->p_linkstate == IBT_PORT_ACTIVE)
+ *lstate = LINK_STATE_UP;
+ else
+ *lstate = LINK_STATE_DOWN;
+
+ ibt_free_portinfo(port_infop, port_infosz);
+ return (0);
+}
+
+static int
+ibd_port_attach(dev_info_t *dip)
+{
+ ibd_state_t *state;
+ link_state_t lstate;
+ int instance;
+ ibt_status_t ret;
+
+ /*
+ * Allocate softstate structure
+ */
+ instance = ddi_get_instance(dip);
+ if (ddi_soft_state_zalloc(ibd_list, instance) == DDI_FAILURE) {
+ DPRINT(10, "ibd_attach: ddi_soft_state_zalloc() failed");
+ return (DDI_FAILURE);
+ }
+
+ state = ddi_get_soft_state(ibd_list, instance);
+
+ state->id_dip = dip;
+ state->id_type = IBD_PORT_DRIVER;
+
+ if ((state->id_port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+ "port-number", 0)) == 0) {
+ DPRINT(10, "ibd_attach: invalid port number (%d)",
+ state->id_port);
+ return (DDI_FAILURE);
+ }
+ if ((state->id_hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
+ "hca-guid", 0)) == 0) {
+ DPRINT(10, "ibd_attach: hca has invalid guid (0x%llx)",
+ state->id_hca_guid);
+ return (DDI_FAILURE);
+ }
+ if ((state->id_port_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
+ "port-guid", 0)) == 0) {
+ DPRINT(10, "ibd_attach: port has invalid guid (0x%llx)",
+ state->id_port_guid);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Attach to IBTL
+ */
+ if ((ret = ibt_attach(&ibdpd_clnt_modinfo, dip, state,
+ &state->id_ibt_hdl)) != IBT_SUCCESS) {
+ DPRINT(10, "ibd_attach: failed in ibt_attach(), ret=%d", ret);
+ goto done;
+ }
+
+ state->id_mac_state |= IBD_DRV_IBTL_ATTACH_DONE;
+
+ if ((ret = ibt_open_hca(state->id_ibt_hdl, state->id_hca_guid,
+ &state->id_hca_hdl)) != IBT_SUCCESS) {
+ DPRINT(10, "ibd_attach: ibt_open_hca() failed, ret=%d", ret);
+ goto done;
+ }
+ state->id_mac_state |= IBD_DRV_HCA_OPENED;
+
+ /* Update link status */
+
+ if (ibd_get_port_state(state, &lstate) != 0) {
+ DPRINT(10, "ibd_attach: ibt_open_hca() failed, ret=%d", ret);
+ goto done;
+ }
+ state->id_link_state = lstate;
+ /*
+ * Register ibd interfaces with the Nemo framework
+ */
+ if (ibd_register_mac(state, dip) != IBT_SUCCESS) {
+ DPRINT(10, "ibd_attach: failed in ibd_register_mac()");
+ goto done;
+ }
+ state->id_mac_state |= IBD_DRV_MAC_REGISTERED;
+
+ mac_link_update(state->id_mh, lstate);
+
+ return (DDI_SUCCESS);
+done:
+ (void) ibd_port_unattach(state, dip);
+ return (DDI_FAILURE);
+}
+
+static int
+ibd_port_unattach(ibd_state_t *state, dev_info_t *dip)
+{
+ int instance;
+ uint32_t progress = state->id_mac_state;
+ ibt_status_t ret;
+
+ if (progress & IBD_DRV_MAC_REGISTERED) {
+ (void) mac_unregister(state->id_mh);
+ state->id_mac_state &= (~IBD_DRV_MAC_REGISTERED);
+ }
+
+ if (progress & IBD_DRV_HCA_OPENED) {
+ if ((ret = ibt_close_hca(state->id_hca_hdl)) !=
+ IBT_SUCCESS) {
+ ibd_print_warn(state, "failed to close "
+ "HCA device, ret=%d", ret);
+ }
+ state->id_hca_hdl = NULL;
+ state->id_mac_state &= (~IBD_DRV_HCA_OPENED);
+ }
+
+ if (progress & IBD_DRV_IBTL_ATTACH_DONE) {
+ if ((ret = ibt_detach(state->id_ibt_hdl)) != IBT_SUCCESS) {
+ ibd_print_warn(state,
+ "ibt_detach() failed, ret=%d", ret);
+ }
+ state->id_ibt_hdl = NULL;
+ state->id_mac_state &= (~IBD_DRV_IBTL_ATTACH_DONE);
+ }
+ instance = ddi_get_instance(dip);
+ ddi_soft_state_free(ibd_list, instance);
+
+ return (DDI_SUCCESS);
+}
+
+ibt_status_t
+ibd_get_part_attr(datalink_id_t linkid, ibt_part_attr_t *attr)
+{
+ ibd_state_t *state;
+
+ mutex_enter(&ibd_objlist_lock);
+
+ /* Find the ibd state structure corresponding the partition */
+ for (state = ibd_objlist_head; state; state = state->id_next) {
+ if (state->id_plinkid == linkid) {
+ break;
+ }
+ }
+
+ if (state == NULL) {
+ mutex_exit(&ibd_objlist_lock);
+ return (IBT_NO_SUCH_OBJECT);
+ }
+
+ attr->pa_dlinkid = state->id_dlinkid;
+ attr->pa_plinkid = state->id_plinkid;
+ attr->pa_port = state->id_port;
+ attr->pa_hca_guid = state->id_hca_guid;
+ attr->pa_port_guid = state->id_port_guid;
+ attr->pa_pkey = state->id_pkey;
+
+ mutex_exit(&ibd_objlist_lock);
+
+ return (IBT_SUCCESS);
+}
+
+ibt_status_t
+ibd_get_all_part_attr(ibt_part_attr_t **attr_list, int *nparts)
+{
+ ibd_state_t *state;
+ int n = 0;
+ ibt_part_attr_t *attr;
+
+ mutex_enter(&ibd_objlist_lock);
+
+ for (state = ibd_objlist_head; state; state = state->id_next)
+ n++;
+
+ *nparts = n;
+ if (n == 0) {
+ *attr_list = NULL;
+ mutex_exit(&ibd_objlist_lock);
+ return (IBT_SUCCESS);
+ }
+
+ *attr_list = kmem_alloc(sizeof (ibt_part_attr_t) * n, KM_SLEEP);
+ attr = *attr_list;
+ for (state = ibd_objlist_head; state; state = state->id_next) {
+#ifdef DEBUG
+ ASSERT(n > 0);
+ n--;
+#endif
+ attr->pa_dlinkid = state->id_dlinkid;
+ attr->pa_plinkid = state->id_plinkid;
+ attr->pa_port = state->id_port;
+ attr->pa_hca_guid = state->id_hca_guid;
+ attr->pa_port_guid = state->id_port_guid;
+ attr->pa_pkey = state->id_pkey;
+ attr++;
+ }
+
+ mutex_exit(&ibd_objlist_lock);
+ return (IBT_SUCCESS);
+}
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.conf Wed Apr 14 10:17:23 2010 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#########################################################################
-#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# Configuration file for the ibd driver.
-#
-
-#
-# Reliable Connected mode (RC) can be enabled or disabled using
-# enable_rc property.
-#
-# 1: unicast packets will be sent over Reliable Connected Mode
-# 0: unicast packets will be sent over Unreliable Datagram Mode
-#
-# Each element in the list below maps to the corresponding ibd
-# instance; the first element is for ibd instance 0, the second
-# element is for instance 1 and so on.
-#
-enable_rc=1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1;
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -55,72 +54,7 @@
#include <sys/ib/clients/ibd/ibd.h>
extern ibd_global_state_t ibd_gstate;
-
-/* Per-interface tunables (for developers) */
-extern uint_t ibd_rc_tx_copy_thresh;
-/*
- * ibd_rc_rx_copy_thresh
- * If (the size of incoming buffer <= ibd_rc_rx_copy_thresh), ibd will
- * attempt to allocate a buffer and do a bcopy of the incoming data into
- * the alocated buffer.
- *
- * ibd_rc_rx_rwqe_thresh
- * If (the number of available rwqe < ibd_rc_rx_rwqe_thresh), ibd will
- * attempt to allocate a buffer and do a bcopy of the incoming data into
- * the allocated buffer.
- */
-uint_t ibd_rc_rx_copy_thresh = 0x1000;
-uint_t ibd_rc_rx_rwqe_thresh = 0x200; /* old is 32; */
-
-/*
- * ibd_rc_num_swqe
- * 1) Send CQ size = ibd_rc_num_swqe
- * 2) The send queue size = ibd_rc_num_swqe -1
- * 3) Number of pre-allocated Tx buffers for ibt_post_send() =
- * ibd_rc_num_swqe - 1.
- */
-uint_t ibd_rc_num_swqe = 0x1ff;
-
-/*
- * ibd_rc_num_rwqe
- * 1) For non-SRQ, we pre-post ibd_rc_num_rwqe number of WRs
- * via ibt_post_receive() for receive queue of each RC channel.
- * 2) For SRQ and non-SRQ, receive CQ size = ibd_rc_num_rwqe
- */
-uint_t ibd_rc_num_rwqe = 0x7ff;
-
-/*
- * For SRQ
- * If using SRQ, we allocate ibd_rc_num_srq number of buffers (the size of
- * each buffer is equal to RC mtu). And post them by ibt_post_srq().
- *
- * ibd_rc_num_srq should not be larger than ibd_rc_num_rwqe, otherwise
- * it will cause a bug with the following warnings:
- * NOTICE: hermon0: Device Error: EQE cq overrun or protection error
- * NOTICE: hermon0: Device Error: EQE local work queue catastrophic error
- * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff catastrophic
- * channel error
- * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff completion queue
- * error
- */
-uint_t ibd_rc_num_srq = 0x7fe;
-
-boolean_t ibd_rc_enable_cq_moderation = B_TRUE;
-
-/*
- * Send CQ moderation parameters
- */
-uint_t ibd_rc_txcomp_count = 10;
-uint_t ibd_rc_txcomp_usec = 300;
-
-/*
- * Receive CQ moderation parameters
- */
-uint_t ibd_rc_rxcomp_count = 4;
-uint_t ibd_rc_rxcomp_usec = 10;
-
uint_t ibd_rc_tx_softintr = 1;
-
/*
* If the number of WRs in receive queue of each RC connection less than
* IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it.
@@ -362,47 +296,6 @@
mutex_exit(&ace->tx_too_big_mutex);
}
-void
-ibd_rc_get_conf(ibd_state_t *state)
-{
- int *props;
- uint_t num_props;
- int instance;
-
- instance = ddi_get_instance(state->id_dip);
-
- /*
- * Get the array of "enable_rc" properties from "ibd.conf" file
- */
- if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->id_dip,
- DDI_PROP_DONTPASS, "enable_rc", &props, &num_props)
- == DDI_PROP_SUCCESS) {
- if (instance < num_props) {
- if (props[instance] == 1) {
- state->id_enable_rc = B_TRUE;
- } else {
- state->id_enable_rc = B_FALSE;
- }
- } else {
- /* not enough properties configured */
- state->id_enable_rc = B_FALSE;
- DPRINT(40, "ibd_rc_get_conf: Not enough "
- "enable_rc values in ibd.conf,"
- " disable RC mode, instance=%d", instance);
- }
-
- /* free memory allocated for properties */
- ddi_prop_free(props);
- } else {
- state->id_enable_rc = B_FALSE;
- DPRINT(30, "ibd_rc_get_conf: fail to find "
- "enable_rc in ibd.conf, disable RC mode");
- }
-
- state->rc_mtu = 65524;
- state->rc_enable_srq = B_TRUE;
-}
-
#ifdef DEBUG
/*
* ibd_rc_update_stats - update driver private kstat counters
@@ -479,12 +372,15 @@
{
kstat_t *ksp;
ibd_rc_stat_t *ibd_rc_ksp;
+ char stat_name[32];
+ int inst;
/*
* Create and init kstat
*/
- ksp = kstat_create("ibd", ddi_get_instance(state->id_dip),
- "statistics", "net", KSTAT_TYPE_NAMED,
+ inst = ddi_get_instance(state->id_dip);
+ (void) snprintf(stat_name, 31, "statistics%d_%x", inst, state->id_pkey);
+ ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED,
sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0);
if (ksp == NULL) {
@@ -611,11 +507,11 @@
/* Allocate IB structures for a new RC channel. */
if (is_tx_chan) {
- chan->scq_size = ibd_rc_num_swqe;
+ chan->scq_size = state->id_rc_num_swqe;
chan->rcq_size = IBD_RC_MIN_CQ_SIZE;
} else {
chan->scq_size = IBD_RC_MIN_CQ_SIZE;
- chan->rcq_size = ibd_rc_num_rwqe;
+ chan->rcq_size = state->id_rc_num_rwqe;
}
cq_atts.cq_size = chan->scq_size;
cq_atts.cq_sched = NULL;
@@ -629,12 +525,10 @@
goto alloc_scq_err;
} /* if failure to alloc cq */
- if (ibd_rc_enable_cq_moderation) {
- if (ibt_modify_cq(chan->scq_hdl, ibd_rc_txcomp_count,
- ibd_rc_txcomp_usec, 0) != IBT_SUCCESS) {
- ibd_print_warn(state, "ibd_rc_alloc_chan: Send CQ "
- "interrupt moderation failed");
- }
+ if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count,
+ state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) {
+ ibd_print_warn(state, "ibd_rc_alloc_chan: Send CQ "
+ "interrupt moderation failed");
}
ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan);
@@ -652,13 +546,12 @@
goto alloc_rcq_err;
} /* if failure to alloc cq */
- if (ibd_rc_enable_cq_moderation) {
- if (ibt_modify_cq(chan->rcq_hdl, ibd_rc_rxcomp_count,
- ibd_rc_rxcomp_usec, 0) != IBT_SUCCESS) {
- ibd_print_warn(state, "ibd_rc_alloc_chan: Receive CQ "
- "interrupt moderation failed");
- }
+ if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count,
+ state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) {
+ ibd_print_warn(state, "ibd_rc_alloc_chan: Receive CQ "
+ "interrupt moderation failed");
}
+
ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan);
ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler,
(void *)(uintptr_t)chan);
@@ -978,7 +871,7 @@
ibt_status_t ret;
srq_sizes.srq_sgl_sz = 1;
- srq_sizes.srq_wr_sz = ibd_rc_num_srq;
+ srq_sizes.srq_wr_sz = state->id_rc_num_srq;
ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS,
state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes);
if (ret != IBT_SUCCESS) {
@@ -1443,7 +1336,7 @@
#ifdef DEBUG
- if (rxcnt < ibd_rc_rx_rwqe_thresh) {
+ if (rxcnt < state->id_rc_rx_rwqe_thresh) {
state->rc_rwqe_short++;
}
#endif
@@ -1451,8 +1344,8 @@
/*
* Possibly replenish the Rx pool if needed.
*/
- if ((rxcnt >= ibd_rc_rx_rwqe_thresh) &&
- (wc->wc_bytes_xfer > ibd_rc_rx_copy_thresh)) {
+ if ((rxcnt >= state->id_rc_rx_rwqe_thresh) &&
+ (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) {
atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer);
atomic_inc_64(&state->rc_rcv_trans_pkt);
@@ -1758,7 +1651,7 @@
size_t mem_size;
int i;
- num_swqe = ibd_rc_num_swqe - 1;
+ num_swqe = state->id_rc_num_swqe - 1;
/*
* Allocate one big chunk for all Tx large copy bufs
@@ -1814,7 +1707,7 @@
{
uint32_t num_swqe;
- num_swqe = ibd_rc_num_swqe - 1;
+ num_swqe = state->id_rc_num_swqe - 1;
if (ibt_deregister_mr(state->id_hca_hdl,
state->rc_tx_mr_hdl) != IBT_SUCCESS) {
@@ -1843,7 +1736,7 @@
/*
* Allocate one big chunk for all regular tx copy bufs
*/
- mem_attr.mr_len = chan->scq_size * ibd_rc_tx_copy_thresh;
+ mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh;
chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP);
@@ -1857,7 +1750,7 @@
&chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) {
DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed");
ASSERT(mem_attr.mr_len ==
- chan->scq_size * ibd_rc_tx_copy_thresh);
+ chan->scq_size * state->id_rc_tx_copy_thresh);
kmem_free(chan->tx_mr_bufs, mem_attr.mr_len);
chan->tx_mr_bufs = NULL;
return (DDI_FAILURE);
@@ -1875,6 +1768,7 @@
ibd_swqe_t *swqe;
int i;
ibt_lkey_t lkey;
+ ibd_state_t *state = chan->state;
if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS)
return (DDI_FAILURE);
@@ -1896,7 +1790,7 @@
swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe;
swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL;
swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t)
- (chan->tx_mr_bufs + i * ibd_rc_tx_copy_thresh);
+ (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh);
swqe->w_swr.wr_trans = IBT_RC_SRV;
/* Add to list */
@@ -1916,6 +1810,7 @@
static void
ibd_rc_fini_txlist(ibd_rc_chan_t *chan)
{
+ ibd_state_t *state = chan->state;
if (chan->tx_mr_hdl != NULL) {
if (ibt_deregister_mr(chan->state->id_hca_hdl,
chan->tx_mr_hdl) != IBT_SUCCESS) {
@@ -1927,7 +1822,7 @@
if (chan->tx_mr_bufs != NULL) {
kmem_free(chan->tx_mr_bufs, chan->scq_size *
- ibd_rc_tx_copy_thresh);
+ state->id_rc_tx_copy_thresh);
chan->tx_mr_bufs = NULL;
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibp.conf Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,25 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+# Configuration file for the ibp driver.
+#
--- a/usr/src/uts/common/io/ib/ibnex/ib.conf Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibnex/ib.conf Wed Apr 14 10:26:18 2010 -0700
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,8 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
#
# Configuration file for the IB nexus driver
#
@@ -75,6 +72,6 @@
# hca-svc-list="nfs1", "nfs2";
#
#
-port-svc-list="";
-vppa-svc-list="ipib";
+port-svc-list="ipib";
+vppa-svc-list="";
hca-svc-list="";
--- a/usr/src/uts/common/io/ib/ibnex/ibnex.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -61,9 +60,9 @@
static int ibnex_getinfo(dev_info_t *, ddi_info_cmd_t,
void *, void **);
static int ibnex_detach(dev_info_t *, ddi_detach_cmd_t);
-static int ibnex_busctl(dev_info_t *,
+int ibnex_busctl(dev_info_t *,
dev_info_t *, ddi_ctl_enum_t, void *, void *);
-static int ibnex_map_fault(dev_info_t *,
+int ibnex_map_fault(dev_info_t *,
dev_info_t *, struct hat *, struct seg *,
caddr_t, struct devpage *, pfn_t, uint_t, uint_t);
static int ibnex_init_child(dev_info_t *);
@@ -75,6 +74,8 @@
static void ibnex_delete_port_node_data(ibnex_node_data_t *);
int ibnex_get_dip_from_guid(ib_guid_t, int,
ib_pkey_t, dev_info_t **);
+int ibnex_get_node_and_dip_from_guid(ib_guid_t, int,
+ ib_pkey_t, ibnex_node_data_t **, dev_info_t **);
static ibnex_node_data_t *ibnex_is_node_data_present(ibnex_node_type_t,
void *, int, ib_pkey_t);
static ibnex_node_data_t *ibnex_init_child_nodedata(ibnex_node_type_t, void *,
@@ -100,17 +101,15 @@
ddi_bus_config_op_t, void *, dev_info_t **);
static int ibnex_bus_unconfig(dev_info_t *,
uint_t, ddi_bus_config_op_t, void *);
-static dev_info_t *ibnex_config_port_node(dev_info_t *, char *);
-static dev_info_t *ibnex_config_obp_args(dev_info_t *, char *);
-static int ibnex_get_pkey_commsvc_index_portnum(
+dev_info_t *ibnex_config_port_node(dev_info_t *, char *);
+int ibnex_get_pkey_commsvc_index_portnum(
char *, int *, ib_pkey_t *, uint8_t *);
-static void ibnex_config_all_children(dev_info_t *);
+void ibnex_config_all_children(dev_info_t *);
static int ibnex_devname_to_portnum(char *, uint8_t *);
-static void ibnex_create_vppa_nodes(
+void ibnex_create_vppa_nodes(dev_info_t *, ibdm_port_attr_t *);
+void ibnex_create_port_nodes(
dev_info_t *, ibdm_port_attr_t *);
-static void ibnex_create_port_nodes(
- dev_info_t *, ibdm_port_attr_t *);
-static void ibnex_create_hcasvc_nodes(
+void ibnex_create_hcasvc_nodes(
dev_info_t *, ibdm_port_attr_t *);
static int ibnex_config_root_iocnode(dev_info_t *, char *);
static int ibnex_devname2port(char *, int *);
@@ -127,14 +126,12 @@
static int ibnex_create_ioc_compatible_prop(
dev_info_t *, ib_dm_ioc_ctrl_profile_t *);
uint64_t ibnex_str2hex(char *, int, int *);
-static int ibnex_str2int(char *, int, int *);
+int ibnex_str2int(char *, int, int *);
static int ibnex_create_ioc_portgid_prop(
dev_info_t *, ibdm_ioc_info_t *);
static void ibnex_wakeup_reprobe_ioc(ibnex_node_data_t *, int);
static void ibnex_wakeup_reprobe_all();
ibt_status_t ibnex_ibtl_callback(ibtl_ibnex_cb_args_t *);
-static int ibnex_prom_devname_to_pkey_n_portnum(
- char *, ib_pkey_t *, uint8_t *);
void ibnex_pseudo_initnodes(void);
static char *ibnex_lookup_named_prop(ddi_prop_t *, char *);
static void ibnex_pseudo_node_cleanup(void);
@@ -161,12 +158,12 @@
int ibnex_pseudo_create_all_pi(ibnex_node_data_t *);
static int ibnex_pseudo_create_pi_pdip(ibnex_node_data_t *,
dev_info_t *);
-static int ibnex_pseudo_config_one(
+int ibnex_pseudo_config_one(
ibnex_node_data_t *, char *, dev_info_t *);
-static int ibnex_pseudo_mdi_config_one(int, void *, dev_info_t **,
+int ibnex_pseudo_mdi_config_one(int, void *, dev_info_t **,
char *, char *);
static void ibnex_config_pseudo_all(dev_info_t *);
-static int ibnex_ioc_bus_config_one(dev_info_t **, uint_t,
+int ibnex_ioc_bus_config_one(dev_info_t **, uint_t,
ddi_bus_config_op_t, void *, dev_info_t **, int *);
static int ibnex_is_merge_node(dev_info_t *);
static void ibnex_hw_in_dev_tree(char *);
@@ -176,35 +173,9 @@
static int ibnex_ioc_pi_reachable(ibdm_ioc_info_t *,
dev_info_t *);
-/*
- * The bus_ops structure defines the capabilities of HCA nexus driver.
- */
-struct bus_ops ibnex_ci_busops = {
- BUSO_REV,
- nullbusmap, /* bus_map */
- NULL, /* bus_get_intrspec */
- NULL, /* bus_add_intrspec */
- NULL, /* bus_remove_intrspec */
- ibnex_map_fault, /* Map Fault */
- ddi_no_dma_map, /* DMA related entry points */
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- ibnex_busctl, /* bus_ctl */
- ddi_bus_prop_op, /* bus_prop_op */
- NULL, /* bus_get_eventcookie */
- NULL, /* bus_add_eventcall */
- NULL, /* bus_remove_eventcall */
- NULL, /* bus_post_event */
- NULL,
- ibnex_bus_config, /* bus config */
- ibnex_bus_unconfig /* bus unconfig */
-};
-
+extern void ibnex_handle_hca_attach(void *);
+
+extern struct bus_ops ibnex_ci_busops;
/*
* Prototype declarations for the VHCI options
*/
@@ -491,12 +462,27 @@
}
mutex_exit(&ibnex.ibnex_mutex);
+ /*
+ * Create a IB nexus taskq
+ */
+
+ ibnex.ibnex_taskq_id = ddi_taskq_create(dip,
+ "ibnex-enum-taskq", 1, TASKQ_DEFAULTPRI, 0);
+ if (ibnex.ibnex_taskq_id == NULL) {
+ IBTF_DPRINTF_L2("ibnex",
+ "\tattach: ddi_taskq_create() failed");
+ return (DDI_FAILURE);
+
+ }
+
/* Register with MPxIO framework */
if (mdi_vhci_register(MDI_HCI_CLASS_IB, dip, &ibnex_vhci_ops, 0)
!= MDI_SUCCESS) {
IBTF_DPRINTF_L2("ibnex",
"\tattach: mdi_vhci_register() failed");
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
return (DDI_FAILURE);
}
@@ -510,6 +496,8 @@
DDI_NT_IB_ATTACHMENT_POINT, 0) != DDI_SUCCESS) {
IBTF_DPRINTF_L2("ibnex",
"\tattach: failed to create fabric minornode");
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
(void) mdi_vhci_unregister(dip, 0);
return (DDI_FAILURE);
}
@@ -523,6 +511,8 @@
IBTF_DPRINTF_L2("ibnex",
"\tattach: failed to create devctl minornode");
(void) ddi_remove_minor_node(dip, NULL);
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
(void) mdi_vhci_unregister(dip, 0);
return (DDI_FAILURE);
}
@@ -536,6 +526,8 @@
IBTF_DPRINTF_L2("ibnex",
"_attach: create pm-want-child-notification failed");
(void) ddi_remove_minor_node(dip, NULL);
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
(void) mdi_vhci_unregister(dip, 0);
return (DDI_FAILURE);
}
@@ -549,9 +541,11 @@
*/
if (ndi_event_alloc_hdl(dip, 0, &ibnex.ibnex_ndi_event_hdl,
NDI_SLEEP) != NDI_SUCCESS) {
- (void) ddi_remove_minor_node(dip, NULL);
IBTF_DPRINTF_L2("ibnex",
"_attach: ndi_event_alloc_hdl failed");
+ (void) ddi_remove_minor_node(dip, NULL);
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
(void) mdi_vhci_unregister(dip, 0);
return (DDI_FAILURE);
}
@@ -561,6 +555,8 @@
(void) ndi_event_free_hdl(ibnex.ibnex_ndi_event_hdl);
IBTF_DPRINTF_L2("ibnex",
"_attach: ndi_event_bind_set failed");
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
(void) mdi_vhci_unregister(dip, 0);
return (DDI_FAILURE);
}
@@ -577,6 +573,8 @@
ibnex.ibnex_ndi_event_hdl = NULL;
IBTF_DPRINTF_L2("ibnex", "_attach: ibnex_comm_svc_init"
" failed %s", ibnex_properties[i].name);
+ (void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
(void) mdi_vhci_unregister(dip, 0);
return (DDI_FAILURE);
}
@@ -676,6 +674,12 @@
ibnex.ibnex_dip = NULL;
mutex_exit(&ibnex.ibnex_mutex);
(void) mdi_vhci_unregister(dip, 0);
+
+ if (ibnex.ibnex_taskq_id != NULL) {
+ ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+ ibnex.ibnex_taskq_id = NULL;
+ }
+
return (DDI_SUCCESS);
}
@@ -884,7 +888,7 @@
* such calls.
*/
/*ARGSUSED*/
-static int
+int
ibnex_map_fault(dev_info_t *dip, dev_info_t *rdip, struct hat *hat,
struct seg *seg, caddr_t addr, struct devpage *dp, pfn_t pfn,
uint_t prot, uint_t lock)
@@ -898,7 +902,7 @@
* bus_ctl bus_ops entry point
*/
/*ARGSUSED*/
-static int
+int
ibnex_busctl(dev_info_t *dip, dev_info_t *rdip,
ddi_ctl_enum_t ctlop, void *arg, void *result)
{
@@ -1120,175 +1124,68 @@
* BUS_CONFIG_DRIVER:
* Enumerate all the instances of a particular driver.
*/
+
static int
ibnex_bus_config(dev_info_t *parent, uint_t flag,
ddi_bus_config_op_t op, void *devname, dev_info_t **child)
{
int ret = IBNEX_SUCCESS, len, circ, need_bus_config;
char *device_name, *cname = NULL, *caddr = NULL;
- char *device_name1;
- char *srvname, nameaddr[MAXNAMELEN];
- dev_info_t *cdip, *pdip = NULL;
+ dev_info_t *cdip;
ibnex_node_data_t *node_data;
- ibnex_port_node_t *port_node;
- int use_mdi_devi_locking = 0;
-
- if (parent != ibnex.ibnex_dip) {
- /*
- * This must be an HCA.In a normal case HCA is setup as a phci.
- * If an HCA is in maintenance mode, its phci is not set up
- * but the driver is attached to update the firmware. In this
- * case, do not configure the MPxIO clients.
- */
- if (mdi_component_is_phci(parent, NULL) == MDI_FAILURE) {
- if (op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER)
- return (NDI_SUCCESS);
- else
- return (NDI_FAILURE);
- }
-
- /* Set use_mdi_devi_locking appropriately */
- if ((op != BUS_CONFIG_ONE) || (op == BUS_CONFIG_ONE &&
- strncmp((char *)devname, IBNEX_IBPORT_CNAME, 6) != 0)) {
- IBTF_DPRINTF_L4("ibnex",
- "\tbus_config: using mdi_devi_enter");
- use_mdi_devi_locking = 1;
- }
- }
-
- if (use_mdi_devi_locking)
- mdi_devi_enter(parent, &circ);
- else
- ndi_devi_enter(parent, &circ);
switch (op) {
case BUS_CONFIG_ONE:
IBTF_DPRINTF_L4("ibnex", "\tbus_config: CONFIG_ONE, "
"parent %p", parent);
+ ndi_devi_enter(parent, &circ);
+
len = strlen((char *)devname) + 1;
device_name = i_ddi_strdup(devname, KM_SLEEP);
i_ddi_parse_name(device_name, &cname, &caddr, NULL);
if (caddr == NULL || (strlen(caddr) == 0)) {
kmem_free(device_name, len);
- if (use_mdi_devi_locking)
- mdi_devi_exit(parent, circ);
- else
- ndi_devi_exit(parent, circ);
+ ndi_devi_exit(parent, circ);
return (NDI_FAILURE);
}
- /*
- * i_ddi_parse_name() strips of the address portion
- * of the device name. Recreate device name for
- * ndi_devi_findchild
- */
- device_name1 = i_ddi_strdup(devname, KM_SLEEP);
-
- IBTF_DPRINTF_L4("ibnex",
- "\tbus_config: cname %s addr %s", cname, caddr);
-
- cdip = ndi_devi_findchild(parent, device_name1);
+ cdip = ndi_devi_findchild(parent, devname);
if (cdip)
node_data = ddi_get_parent_data(cdip);
- kmem_free(device_name1, len);
+
+ ndi_devi_exit(parent, circ);
+
if (cdip == NULL || (node_data != NULL &&
node_data->node_dip == NULL)) {
/* Node is not present */
if (strncmp(cname, IBNEX_IOC_CNAME, 3) == 0) {
- if (use_mdi_devi_locking)
- mdi_devi_exit(parent, circ);
- else
- ndi_devi_exit(parent, circ);
-
ret = ibnex_ioc_bus_config_one(&parent, flag,
op, devname, child, &need_bus_config);
if (!need_bus_config) {
kmem_free(device_name, len);
return (ret);
}
-
- if (use_mdi_devi_locking)
- mdi_devi_enter(parent, &circ);
- else
- ndi_devi_enter(parent, &circ);
- } else if ((strncmp(cname,
- IBNEX_IBPORT_CNAME, 6) == 0) &&
- (parent != ibnex.ibnex_dip)) { /* parent is HCA */
- cdip = ibnex_config_port_node(parent, devname);
- if (cdip)
- ret = IBNEX_SUCCESS;
- else
- ret = IBNEX_FAILURE;
} else {
/*
- * if not IOC or PORT device then always
- * assume a Pseudo child
- *
- * if IB Nexus is the parent, call MDI.
- * else if HCA is the parent, enumerate
- * the Pseudo node.
+ * if IB Nexus is the parent, call MDI. Bus
+ * config with HCA as the parent would have
+ * enumerated the Pseudo node.
*/
ret = IBNEX_SUCCESS;
ibnex_pseudo_initnodes();
- if (parent == ibnex.ibnex_dip) {
- if (use_mdi_devi_locking)
- mdi_devi_exit(parent, circ);
- else
- ndi_devi_exit(parent, circ);
-
- mutex_enter(&ibnex.ibnex_mutex);
- ret = ibnex_pseudo_mdi_config_one(
- flag, devname, child, cname,
- caddr);
- mutex_exit(&ibnex.ibnex_mutex);
- kmem_free(device_name, len);
- return (ret);
- }
mutex_enter(&ibnex.ibnex_mutex);
- ret = ibnex_pseudo_config_one(NULL,
- caddr, parent);
+ ret = ibnex_pseudo_mdi_config_one(flag, devname,
+ child, cname, caddr);
mutex_exit(&ibnex.ibnex_mutex);
+ kmem_free(device_name, len);
+ return (ret);
}
}
-
- if (strncmp(cname, IBNEX_IBPORT_CNAME, 6) == 0) {
- /* Allows enumeration under PHCI */
- flag |= NDI_MDI_FALLBACK;
- }
kmem_free(device_name, len);
break;
- case BUS_CONFIG_OBP_ARGS:
- cdip = ibnex_config_obp_args(parent, devname);
- if (cdip) {
- /*
- * Boot case.
- * Special handling because the "devname"
- * format for the enumerated device is
- * different.
- */
- node_data = ddi_get_parent_data(cdip);
- port_node = &node_data->node_data.port_node;
- if (node_data->node_type ==
- IBNEX_VPPA_COMMSVC_NODE) {
- srvname =
- ibnex.ibnex_vppa_comm_svc_names[
- port_node->port_commsvc_idx];
- (void) snprintf(nameaddr, MAXNAMELEN,
- "ibport@%x,%x,%s",
- port_node->port_num,
- port_node->port_pkey, srvname);
- }
- devname = (void *)nameaddr;
- } else {
- IBTF_DPRINTF_L2("ibnex",
- "\tbus_config: CONFIG_OBP_ARGS : invalid state!!");
-
- ret = IBNEX_FAILURE;
- }
- break;
case BUS_CONFIG_ALL:
/*FALLTHRU*/
case BUS_CONFIG_DRIVER:
@@ -1300,15 +1197,6 @@
", parent %p", parent);
/*
- * No locks to be held while calling mdi_vhci_bus_config()
- * ibnex_config_all_children() holds appropriate locks.
- */
- if (use_mdi_devi_locking)
- mdi_devi_exit(parent, circ);
- else
- ndi_devi_exit(parent, circ);
-
- /*
* Drive CONFIG requests for IB Nexus parent through
* MDI. This is needed to load the HCA drivers in x86 SRP
* boot case.
@@ -1316,77 +1204,55 @@
* CONFIG Requests with HCA parent will probe devices using
* ibdm and configure all children.
*/
- if (parent == ibnex.ibnex_dip) {
- ibdm_ioc_info_t *ioc_list, *new_ioc_list;
-
- mutex_enter(&ibnex.ibnex_mutex);
- while (ibnex.ibnex_ioc_list_state !=
- IBNEX_IOC_LIST_READY) {
- cv_wait(&ibnex.ibnex_ioc_list_cv,
- &ibnex.ibnex_mutex);
- }
- ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_RENEW;
- mutex_exit(&ibnex.ibnex_mutex);
- /* Enumerate all the IOC's */
- ibdm_ibnex_port_settle_wait(0,
- ibnex_port_settling_time);
-
- new_ioc_list = ibdm_ibnex_get_ioc_list(
- IBDM_IBNEX_NORMAL_PROBE);
+ ibdm_ioc_info_t *ioc_list, *new_ioc_list;
+
+ mutex_enter(&ibnex.ibnex_mutex);
+ while (ibnex.ibnex_ioc_list_state !=
+ IBNEX_IOC_LIST_READY) {
+ cv_wait(&ibnex.ibnex_ioc_list_cv,
+ &ibnex.ibnex_mutex);
+ }
+ ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_RENEW;
+ mutex_exit(&ibnex.ibnex_mutex);
+ /* Enumerate all the IOC's */
+ ibdm_ibnex_port_settle_wait(0, ibnex_port_settling_time);
+
+ new_ioc_list = ibdm_ibnex_get_ioc_list(
+ IBDM_IBNEX_NORMAL_PROBE);
+ IBTF_DPRINTF_L4("ibnex",
+ "\tbus_config: alloc ioc_list %p", new_ioc_list);
+ /*
+ * Optimize the calls for each BUS_CONFIG_ALL request
+ * to the IB Nexus dip. This is currently done for
+ * each PDIP.
+ */
+ mutex_enter(&ibnex.ibnex_mutex);
+ ioc_list = ibnex.ibnex_ioc_list;
+ ibnex.ibnex_ioc_list = new_ioc_list;
+ ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_READY;
+ cv_broadcast(&ibnex.ibnex_ioc_list_cv);
+ mutex_exit(&ibnex.ibnex_mutex);
+
+ if (ioc_list) {
IBTF_DPRINTF_L4("ibnex",
- "\tbus_config: alloc ioc_list %p", new_ioc_list);
- /*
- * Optimize the calls for each BUS_CONFIG_ALL request
- * to the IB Nexus dip. This is currently done for
- * each PDIP.
- */
- mutex_enter(&ibnex.ibnex_mutex);
- ioc_list = ibnex.ibnex_ioc_list;
- ibnex.ibnex_ioc_list = new_ioc_list;
- ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_READY;
- cv_broadcast(&ibnex.ibnex_ioc_list_cv);
- mutex_exit(&ibnex.ibnex_mutex);
-
- if (ioc_list) {
- IBTF_DPRINTF_L4("ibnex",
- "\tbus_config: freeing ioc_list %p",
- ioc_list);
- ibdm_ibnex_free_ioc_list(ioc_list);
- }
-
-
- ret = mdi_vhci_bus_config(parent,
- flag, op, devname, child, NULL);
- return (ret);
- } else {
- ibnex_config_all_children(parent);
-
- if (use_mdi_devi_locking)
- mdi_devi_enter(parent, &circ);
- else
- ndi_devi_enter(parent, &circ);
+ "\tbus_config: freeing ioc_list %p",
+ ioc_list);
+ ibdm_ibnex_free_ioc_list(ioc_list);
}
- break;
+
+
+ ret = mdi_vhci_bus_config(parent,
+ flag, op, devname, child, NULL);
+ return (ret);
default:
IBTF_DPRINTF_L4("ibnex", "\tbus_config: error");
ret = IBNEX_FAILURE;
break;
}
- if (use_mdi_devi_locking)
- mdi_devi_exit(parent, circ);
- else
- ndi_devi_exit(parent, circ);
-
if (ret == IBNEX_SUCCESS) {
- if (op == BUS_CONFIG_OBP_ARGS)
- op = BUS_CONFIG_ONE;
-
- if (pdip == NULL)
- pdip = parent;
-
ret = ndi_busop_bus_config(
- pdip, flag, op, devname, child, 0);
+ parent, flag, op, devname, child, 0);
IBTF_DPRINTF_L4("ibnex", "\tbus_config:"
"ndi_busop_bus_config : retval %d", ret);
return (ret);
@@ -1506,7 +1372,7 @@
* Bind drivers for all the newly created device nodes
* Support Pseudo nodes enumerated using their .conf file
*/
-static void
+void
ibnex_config_all_children(dev_info_t *parent)
{
int ii;
@@ -1517,6 +1383,7 @@
IBTF_DPRINTF_L4("ibnex", "\tconfig_all_children: Begin");
+
/*
* Enumerate children of this HCA, port nodes,
* VPPA & HCA_SVC nodes. Use ndi_devi_enter() for
@@ -1535,8 +1402,7 @@
for (ii = 0; ii < hca_list->hl_nports; ii++) {
ibnex_create_port_nodes(
parent, &hca_list->hl_port_attr[ii]);
- ibnex_create_vppa_nodes(
- parent, &hca_list->hl_port_attr[ii]);
+ ibnex_create_vppa_nodes(parent, &hca_list->hl_port_attr[ii]);
}
ibdm_ibnex_free_hca_list(hca_list);
ndi_devi_exit(parent, circ);
@@ -1577,7 +1443,7 @@
* Creates a device node per each communication service defined
* in the "port-commsvc-list" property per HCA port
*/
-static void
+void
ibnex_create_port_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr)
{
int idx;
@@ -1588,7 +1454,7 @@
for (idx = 0; idx < ibnex.ibnex_num_comm_svcs; idx++) {
rval = ibnex_get_dip_from_guid(port_attr->pa_port_guid,
idx, 0, &dip);
- if (rval != IBNEX_SUCCESS) {
+ if (rval != IBNEX_SUCCESS || dip == NULL) {
(void) ibnex_commsvc_initnode(parent, port_attr, idx,
IBNEX_PORT_COMMSVC_NODE, 0, &rval,
IBNEX_DEVFS_ENUMERATE);
@@ -1604,8 +1470,9 @@
* in the "vppa-commsvc-list" property and per each PKEY that
* this particular port supports and per HCA port
*/
-static void
-ibnex_create_vppa_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr)
+void
+ibnex_create_vppa_nodes(
+ dev_info_t *parent, ibdm_port_attr_t *port_attr)
{
int idx, ii;
int rval;
@@ -1622,6 +1489,10 @@
return;
}
for (idx = 0; idx < ibnex.ibnex_nvppa_comm_svcs; idx++) {
+ if (strcmp("ipib", ibnex.ibnex_vppa_comm_svc_names[idx]) == 0) {
+ IBTF_DPRINTF_L2("ibnex", "Skipping IBD devices...");
+ continue;
+ }
for (ii = 0; ii < port_attr->pa_npkeys; ii++) {
pkey = port_attr->pa_pkey_tbl[ii].pt_pkey;
@@ -1646,7 +1517,7 @@
* Creates a device node per each communication service defined
* in the "port-commsvc-list" property per HCA port
*/
-static void
+void
ibnex_create_hcasvc_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr)
{
int idx;
@@ -1657,7 +1528,7 @@
for (idx = 0; idx < ibnex.ibnex_nhcasvc_comm_svcs; idx++) {
rval = ibnex_get_dip_from_guid(port_attr->pa_port_guid,
idx, 0, &dip);
- if (rval != IBNEX_SUCCESS) {
+ if (rval != IBNEX_SUCCESS || dip == NULL) {
(void) ibnex_commsvc_initnode(parent, port_attr, idx,
IBNEX_HCASVC_COMMSVC_NODE, 0, &rval,
IBNEX_DEVFS_ENUMERATE);
@@ -1687,74 +1558,27 @@
DDI_SUCCESS)
return (DDI_FAILURE);
- /*
- * We can come into this routine with dip as ibnexus dip or hca dip.
- * When the dip is that of ib nexus we need to clean up the IOC and
- * pseudo nodes. When the dip is that of an HCA (not IB nexus dip)
- * cleanup the port nodes.
- */
if ((op == BUS_UNCONFIG_ALL || op == BUS_UNCONFIG_DRIVER) &&
(flag & (NDI_UNCONFIG | NDI_DETACH_DRIVER))) {
mutex_enter(&ibnex.ibnex_mutex);
- if (parent != ibnex.ibnex_dip) {
- if (major == -1) {
- /*
- * HCA dip. When major number is -1 HCA is
- * going away cleanup all the port nodes.
- */
- for (ndp = ibnex.ibnex_port_node_head;
- ndp; ndp = ndp->node_next) {
- ibnex_port_node_t *port_node;
-
- port_node = &ndp->node_data.port_node;
- if (port_node->port_pdip == parent) {
- port_node->port_pdip = NULL;
- ndp->node_dip = NULL;
- ndp->node_state =
- IBNEX_CFGADM_UNCONFIGURED;
- }
- }
- } else {
- /*
- * HCA dip. Cleanup only the port nodes that
- * match the major number.
- */
- for (ndp = ibnex.ibnex_port_node_head;
- ndp; ndp = ndp->node_next) {
- ibnex_port_node_t *port_node;
-
- port_node = &ndp->node_data.port_node;
- dip = ndp->node_dip;
- if (dip && (ddi_driver_major(dip) ==
- major) && port_node->port_pdip ==
- parent) {
- port_node->port_pdip = NULL;
- ndp->node_dip = NULL;
- ndp->node_state =
- IBNEX_CFGADM_UNCONFIGURED;
- }
- }
+ /*
+ * IB dip. here we handle IOC and pseudo nodes which
+ * are the children of IB nexus. Cleanup only the nodes
+ * with matching major number. We also need to cleanup
+ * the PathInfo links to the PHCI here.
+ */
+ for (ndp = ibnex.ibnex_ioc_node_head;
+ ndp; ndp = ndp->node_next) {
+ dip = ndp->node_dip;
+ if (dip && (ddi_driver_major(dip) == major)) {
+ (void) ibnex_offline_childdip(dip);
}
- } else {
- /*
- * IB dip. here we handle IOC and pseudo nodes which
- * are the children of IB nexus. Cleanup only the nodes
- * with matching major number. We also need to cleanup
- * the PathInfo links to the PHCI here.
- */
- for (ndp = ibnex.ibnex_ioc_node_head;
- ndp; ndp = ndp->node_next) {
- dip = ndp->node_dip;
- if (dip && (ddi_driver_major(dip) == major)) {
- (void) ibnex_offline_childdip(dip);
- }
- }
- for (ndp = ibnex.ibnex_pseudo_node_head;
- ndp; ndp = ndp->node_next) {
- dip = ndp->node_dip;
- if (dip && (ddi_driver_major(dip) == major)) {
- (void) ibnex_offline_childdip(dip);
- }
+ }
+ for (ndp = ibnex.ibnex_pseudo_node_head;
+ ndp; ndp = ndp->node_next) {
+ dip = ndp->node_dip;
+ if (dip && (ddi_driver_major(dip) == major)) {
+ (void) ibnex_offline_childdip(dip);
}
}
mutex_exit(&ibnex.ibnex_mutex);
@@ -1775,7 +1599,7 @@
* Returns "dev_info_t" of the "child" node just created
* NULL when failed to enumerate the child node
*/
-static dev_info_t *
+dev_info_t *
ibnex_config_port_node(dev_info_t *parent, char *devname)
{
int ii, index;
@@ -1826,8 +1650,8 @@
port_guid = port_attr->pa_port_guid;
mutex_enter(&ibnex.ibnex_mutex);
- if ((rval = ibnex_get_dip_from_guid(port_guid, index, pkey,
- &cdip)) == IBNEX_SUCCESS) {
+ rval = ibnex_get_dip_from_guid(port_guid, index, pkey, &cdip);
+ if ((rval == IBNEX_SUCCESS) && cdip != NULL) {
IBTF_DPRINTF_L4("ibnex", "\tconfig_port_node: Node exists");
mutex_exit(&ibnex.ibnex_mutex);
if (port_num != 0)
@@ -1878,152 +1702,12 @@
/*
- * ibnex_config_obp_args()
- * Configures a particular port node for a IP over IB communication
- * service.
- * The format of the input string "devname" is
- * port=x,pkey=y,protocol=ip,<wanboot options>
- * Thr format of the node name created here is
- * ibport@<Port#>,<pkey>,<service name>
- * where pkey = 0 for port communication service nodes
- * Returns "dev_info_t" of the "child" node just created
- * NULL when failed to enumerate the child node
- *
- */
-static dev_info_t *
-ibnex_config_obp_args(dev_info_t *parent, char *devname)
-{
- int ii, index;
- int rval, iter = 0;
- char *temp;
- uint8_t port_num;
- ib_guid_t hca_guid, port_guid;
- ib_pkey_t pkey;
- dev_info_t *cdip;
- boolean_t displayed = B_FALSE;
- ibdm_port_attr_t *port_attr;
-
- IBTF_DPRINTF_L4("ibnex", "\tconfig_obp_args: %s", devname);
-
- /* Is this OBP node for IPoIB ? */
- temp = devname;
- do {
- temp = strstr(temp, ",protocol=ip");
- if (temp == NULL)
- break;
-
- if (strlen(devname) > (int)((temp - devname) + 12)) {
- if (temp[12] == ',')
- break;
- } else {
- break;
- }
- temp++;
- } while (temp);
-
- if (temp == NULL)
- return (NULL);
- if (ibnex_prom_devname_to_pkey_n_portnum(
- devname, &pkey, &port_num) != IBNEX_SUCCESS) {
- return (NULL);
- }
- for (index = 0; index < ibnex.ibnex_nvppa_comm_svcs; index++) {
- if (strcmp(ibnex.ibnex_vppa_comm_svc_names[index],
- "ipib") == 0) {
- break;
- }
- }
-
- hca_guid = ibtl_ibnex_hcadip2guid(parent);
- if ((port_attr = ibdm_ibnex_probe_hcaport(
- hca_guid, port_num)) == NULL) {
- IBTF_DPRINTF_L2("ibnex",
- "\tconfig_port_node: Port does not exist");
- return (NULL);
- }
-
- /* Wait until "port is up" */
- while (port_attr->pa_state != IBT_PORT_ACTIVE) {
- ibdm_ibnex_free_port_attr(port_attr);
- delay(drv_usectohz(10000));
- if ((port_attr = ibdm_ibnex_probe_hcaport(
- hca_guid, port_num)) == NULL) {
- return (NULL);
- }
- if (iter++ == 400) {
- if (displayed == B_FALSE) {
- cmn_err(CE_NOTE, "\tWaiting for Port %d "
- "initialization", port_attr->pa_port_num);
- displayed = B_TRUE;
- }
- }
- }
- IBTF_DPRINTF_L4("ibnex", "\tPort is initialized");
-
- mutex_enter(&ibnex.ibnex_mutex);
- port_guid = port_attr->pa_port_guid;
- if ((rval = ibnex_get_dip_from_guid(port_guid, index, pkey,
- &cdip)) == IBNEX_SUCCESS) {
- IBTF_DPRINTF_L4("ibnex", "\tconfig_port_node: Node exists");
- mutex_exit(&ibnex.ibnex_mutex);
- ibdm_ibnex_free_port_attr(port_attr);
- return (cdip);
- }
- for (ii = 0; ii < port_attr->pa_npkeys; ii++) {
- if (pkey == port_attr->pa_pkey_tbl[ii].pt_pkey) {
- cdip = ibnex_commsvc_initnode(parent, port_attr,
- index, IBNEX_VPPA_COMMSVC_NODE, pkey, &rval,
- IBNEX_CFGADM_ENUMERATE);
- IBTF_DPRINTF_L5("ibnex",
- "\t ibnex_commsvc_initnode rval %x", rval);
- break;
- }
- }
- mutex_exit(&ibnex.ibnex_mutex);
-
- ibdm_ibnex_free_port_attr(port_attr);
- return (cdip);
-}
-
-
-/*
- * ibnex_prom_devname_to_pkey_n_portnum()
- * Parses the device node name and extracts "PKEY" and "port#"
- * Returns IBNEX_SUCCESS/IBNEX_FAILURE
- */
-static int
-ibnex_prom_devname_to_pkey_n_portnum(
- char *devname, ib_pkey_t *pkey, uint8_t *port)
-{
- int ret = IBNEX_SUCCESS;
- char *tmp, *tmp1;
-
- if ((tmp = strstr(devname, "port=")) != NULL) {
- if ((tmp = strchr(++tmp, '=')) != NULL)
- if ((tmp1 = strchr(++tmp, ',')) != NULL)
- *port = ibnex_str2int(tmp, (tmp1 - tmp), &ret);
- } else
- ret = IBNEX_FAILURE;
-
- if ((ret == IBNEX_SUCCESS) &&
- (tmp = strstr(devname, "pkey=")) != NULL) {
- if ((tmp = strchr(++tmp, '=')) != NULL)
- if ((tmp1 = strchr(++tmp, ',')) != NULL)
- *pkey = ibnex_str2hex(tmp, (tmp1 - tmp), &ret);
- } else
- ret = IBNEX_FAILURE;
-
- return (ret);
-}
-
-
-/*
* ibnex_get_pkey_commsvc_index_portnum()
* Parses the device node name and extracts PKEY, communication
* service index & Port #.
* Returns IBNEX_SUCCESS/IBNEX_FAILURE
*/
-static int
+int
ibnex_get_pkey_commsvc_index_portnum(char *device_name, int *index,
ib_pkey_t *pkey, uint8_t *port_num)
{
@@ -2238,7 +1922,7 @@
/*
* ibnex_pseudo_config_one()
*/
-static int
+int
ibnex_pseudo_config_one(ibnex_node_data_t *node_data, char *caddr,
dev_info_t *pdip)
{
@@ -2345,7 +2029,7 @@
* node_state, node_dip, etc. These checks and initializations
* are done when BUS_CONFIG is called with PHCI as the parent.
*/
-static int
+int
ibnex_pseudo_mdi_config_one(int flag, void *devname, dev_info_t **child,
char *cname, char *caddr)
{
@@ -2948,6 +2632,7 @@
ibdm_ioc_info_t *ioc_list, *ioc;
ibnex_node_data_t *node_data;
dev_info_t *phci;
+ ib_guid_t *guid;
IBTF_DPRINTF_L4("ibnex", "\tdm_callback: attr %p event %x", arg, flag);
@@ -2963,6 +2648,15 @@
"create minor node for port w/ guid %s", hca_guid);
}
+ guid = kmem_alloc(sizeof (ib_guid_t), KM_SLEEP);
+ *guid = *(ib_guid_t *)arg;
+ if (ddi_taskq_dispatch(ibnex.ibnex_taskq_id,
+ ibnex_handle_hca_attach, guid, DDI_NOSLEEP)
+ != DDI_SUCCESS) {
+ IBTF_DPRINTF_L4("ibnex", "\tdm_callback: failed to "
+ "dispatch HCA add event for guid %s", hca_guid);
+ }
+
break;
case IBDM_EVENT_HCA_REMOVED:
@@ -3002,6 +2696,56 @@
/*
+ * ibnex_get_node_and_dip_from_guid()
+ *
+ * Searches the linked list of the port nodes and returns the dip for
+ * the of the Port / Node guid requested.
+ * Returns NULL if not found
+ */
+int
+ibnex_get_node_and_dip_from_guid(ib_guid_t guid, int index, ib_pkey_t pkey,
+ ibnex_node_data_t **nodep, dev_info_t **dip)
+{
+ int node_index;
+ ib_guid_t node_guid;
+ ib_pkey_t node_pkey;
+ ibnex_node_data_t *node_data;
+
+ IBTF_DPRINTF_L4("ibnex",
+ "\tget_node_and_dip_from_guid: guid = %llX", guid);
+
+ ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex));
+ /* Search for a matching entry in internal lists */
+ node_data = ibnex.ibnex_port_node_head;
+ while (node_data) {
+ node_guid = node_data->node_data.port_node.port_guid;
+ node_index = node_data->node_data.port_node.port_commsvc_idx;
+ node_pkey = node_data->node_data.port_node.port_pkey;
+ if ((node_guid == guid) && (index == node_index) &&
+ (node_pkey == pkey)) {
+ break;
+ }
+ node_data = node_data->node_next;
+ }
+
+ /* matching found with a valid dip */
+ if (node_data && node_data->node_dip) {
+ *nodep = node_data;
+ *dip = node_data->node_dip;
+ return (IBNEX_SUCCESS);
+ } else if (node_data && !node_data->node_dip) { /* dip is invalid */
+ *nodep = node_data;
+ *dip = NULL;
+ return (IBNEX_SUCCESS);
+ }
+
+ /* no match found */
+ *nodep = NULL;
+ *dip = NULL;
+ return (IBNEX_FAILURE);
+}
+
+/*
* ibnex_get_dip_from_guid()
*
* Searches the linked list of the port nodes and returns the dip for
@@ -3490,7 +3234,7 @@
* integer.
* Returns IBNEX_SUCCESS/IBNEX_FAILURE
*/
-static int
+int
ibnex_str2int(char *c, int len, int *ret)
{
int intval = 0, ii;
@@ -4209,23 +3953,13 @@
* the client would have been detached by mdi_devi_offline.
*/
if (clnt_num_pi == 1) {
- for (node_data = ibnex.ibnex_ioc_node_head;
- node_data; node_data = node_data->node_next) {
- if (node_data->node_dip == cdip) {
- node_data->node_dip = NULL;
- node_data->node_state =
- IBNEX_CFGADM_UNCONFIGURED;
- return (MDI_SUCCESS);
- }
- }
- for (node_data = ibnex.ibnex_pseudo_node_head;
- node_data; node_data = node_data->node_next) {
- if (node_data->node_dip == cdip) {
- node_data->node_dip = NULL;
- node_data->node_state =
- IBNEX_CFGADM_UNCONFIGURED;
- return (MDI_SUCCESS);
- }
+ node_data = ddi_get_parent_data(cdip);
+ if (node_data == NULL)
+ return (MDI_SUCCESS);
+ if (node_data->node_dip == cdip) {
+ node_data->node_dip = NULL;
+ node_data->node_state = IBNEX_CFGADM_UNCONFIGURED;
+ return (MDI_SUCCESS);
}
}
return (MDI_SUCCESS);
@@ -4298,7 +4032,7 @@
* 1. ibdm to probe IOC
* 2. Create a pathinfo only if the IOC is reachable from the parent dip.
*/
-static int
+int
ibnex_ioc_bus_config_one(dev_info_t **pdipp, uint_t flag,
ddi_bus_config_op_t op, void *devname, dev_info_t **child,
int *need_bus_config)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex_hca.c Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,608 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/taskq.h>
+#include <sys/mdi_impldefs.h>
+#include <sys/sunmdi.h>
+#include <sys/sunpm.h>
+#include <sys/ib/mgt/ibdm/ibdm_impl.h>
+#include <sys/ib/ibnex/ibnex.h>
+#include <sys/ib/ibnex/ibnex_devctl.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/impl/ibtl_ibnex.h>
+#include <sys/file.h>
+#include <sys/hwconf.h>
+#include <sys/fs/dv_node.h>
+
+void ibnex_handle_hca_attach(void *);
+static int ibnex_hca_bus_config_one(dev_info_t *, void *,
+ ddi_bus_config_op_t, uint_t *, dev_info_t **);
+
+static ibnex_node_data_t *ibnex_get_cdip_info(dev_info_t *, char *,
+ dev_info_t **, ibnex_node_type_t *);
+static int ibnex_prom_devname_to_pkey_n_portnum(
+ char *, ib_pkey_t *, uint8_t *);
+static dev_info_t *ibnex_config_obp_args(dev_info_t *, char *);
+
+extern int ibnex_busctl(dev_info_t *,
+ dev_info_t *, ddi_ctl_enum_t, void *, void *);
+extern int ibnex_map_fault(dev_info_t *,
+ dev_info_t *, struct hat *, struct seg *,
+ caddr_t, struct devpage *, pfn_t, uint_t, uint_t);
+static int ibnex_hca_bus_config(dev_info_t *, uint_t,
+ ddi_bus_config_op_t, void *, dev_info_t **);
+static int ibnex_hca_bus_unconfig(dev_info_t *,
+ uint_t, ddi_bus_config_op_t, void *);
+extern dev_info_t *ibnex_config_port_node(dev_info_t *, char *);
+extern dev_info_t *ibnex_config_obp_args(dev_info_t *, char *);
+extern int ibnex_ioc_bus_config_one(dev_info_t **, uint_t,
+ ddi_bus_config_op_t, void *, dev_info_t **, int *);
+extern int ibnex_pseudo_config_one(
+ ibnex_node_data_t *, char *, dev_info_t *);
+extern void ibnex_config_all_children(dev_info_t *);
+extern void ibnex_pseudo_initnodes(void);
+
+extern int ibnex_pseudo_mdi_config_one(int, void *, dev_info_t **,
+ char *, char *);
+extern int ibnex_get_dip_from_guid(ib_guid_t, int,
+ ib_pkey_t, dev_info_t **);
+extern dev_info_t *ibnex_commsvc_initnode(dev_info_t *,
+ ibdm_port_attr_t *, int, int, ib_pkey_t, int *,
+ int);
+extern uint64_t ibnex_str2hex(char *, int, int *);
+extern int ibnex_str2int(char *, int, int *);
+extern void ibnex_create_hcasvc_nodes(
+ dev_info_t *, ibdm_port_attr_t *);
+extern void ibnex_create_port_nodes(
+ dev_info_t *, ibdm_port_attr_t *);
+extern void ibnex_create_vppa_nodes(
+ dev_info_t *, ibdm_port_attr_t *);
+extern int ibnex_get_pkey_commsvc_index_portnum(
+ char *, int *, ib_pkey_t *, uint8_t *);
+
+extern ibnex_t ibnex;
+extern int ibnex_port_settling_time;
+
+/*
+ * The bus_ops structure defines the capabilities of HCA nexus driver.
+ */
+struct bus_ops ibnex_ci_busops = {
+ BUSO_REV,
+ nullbusmap, /* bus_map */
+ NULL, /* bus_get_intrspec */
+ NULL, /* bus_add_intrspec */
+ NULL, /* bus_remove_intrspec */
+ ibnex_map_fault, /* Map Fault */
+ ddi_no_dma_map, /* DMA related entry points */
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ibnex_busctl, /* bus_ctl */
+ ddi_bus_prop_op, /* bus_prop_op */
+ NULL, /* bus_get_eventcookie */
+ NULL, /* bus_add_eventcall */
+ NULL, /* bus_remove_eventcall */
+ NULL, /* bus_post_event */
+ NULL,
+ ibnex_hca_bus_config, /* bus config */
+ ibnex_hca_bus_unconfig /* bus unconfig */
+};
+
+/*
+ * ibnex_hca_bus_config()
+ *
+ * BUS_CONFIG_ONE:
+ * Enumerate the exact instance of the driver. Use the device node name
+ * to locate the exact instance.
+ * Query IBDM to find whether the hardware exits for the instance of the
+ * driver. If exists, create a device node and return NDI_SUCCESS.
+ *
+ * BUS_CONFIG_ALL:
+ * Enumerate all the instances of all the possible children (seen before
+ * and never seen before).
+ *
+ * BUS_CONFIG_DRIVER:
+ * Enumerate all the instances of a particular driver.
+ */
+static int
+ibnex_hca_bus_config(dev_info_t *parent, uint_t flag,
+ ddi_bus_config_op_t op, void *devname, dev_info_t **child)
+{
+ int ret = IBNEX_SUCCESS, circ;
+ char *srvname, nameaddr[MAXNAMELEN];
+ dev_info_t *cdip;
+ ibnex_node_data_t *node_data;
+ ibnex_port_node_t *port_node;
+
+ /*
+ * In a normal case HCA is setup as a phci.
+ * If an HCA is in maintenance mode, its phci is not set up
+ * but the driver is attached to update the firmware. In this
+ * case, do not configure the MPxIO clients.
+ */
+ if (mdi_component_is_phci(parent, NULL) == MDI_FAILURE) {
+ if (op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER)
+ return (NDI_SUCCESS);
+ else
+ return (NDI_FAILURE);
+ }
+
+ switch (op) {
+ case BUS_CONFIG_ONE:
+ IBTF_DPRINTF_L4("ibnex", "\thca_bus_config: CONFIG_ONE, "
+ "parent %p", parent);
+ ret = ibnex_hca_bus_config_one(
+ parent, devname, op, &flag, child);
+ break;
+
+ case BUS_CONFIG_OBP_ARGS:
+ mdi_devi_enter(parent, &circ);
+ cdip = ibnex_config_obp_args(parent, devname);
+ if (cdip) {
+ /*
+ * Boot case.
+ * Special handling because the "devname"
+ * format for the enumerated device is
+ * different.
+ */
+ node_data = ddi_get_parent_data(cdip);
+ port_node = &node_data->node_data.port_node;
+ if (node_data->node_type ==
+ IBNEX_VPPA_COMMSVC_NODE) {
+ srvname =
+ ibnex.ibnex_vppa_comm_svc_names[
+ port_node->port_commsvc_idx];
+ (void) snprintf(nameaddr, MAXNAMELEN,
+ "ibport@%x,%x,%s",
+ port_node->port_num,
+ port_node->port_pkey, srvname);
+ }
+ devname = (void *)nameaddr;
+ } else {
+ IBTF_DPRINTF_L2("ibnex", "\thca_bus_config: "
+ "CONFIG_OBP_ARGS : invalid state!!");
+
+ ret = IBNEX_FAILURE;
+ }
+ mdi_devi_exit(parent, circ);
+ break;
+
+ case BUS_CONFIG_ALL:
+ IBTF_DPRINTF_L4("ibnex",
+ "\thca_bus_config: CONFIG_ALL parent %p", parent);
+ ibnex_config_all_children(parent);
+ break;
+
+ case BUS_CONFIG_DRIVER:
+ IBTF_DPRINTF_L4("ibnex", "\thca_bus_config: "
+ "CONFIG_DRIVER parent %p", parent);
+ ibnex_config_all_children(parent);
+ break;
+
+ default:
+ IBTF_DPRINTF_L4("ibnex", "\thca_bus_config: error");
+ ret = IBNEX_FAILURE;
+ break;
+ }
+
+
+ if (ret == IBNEX_SUCCESS) {
+ if (op == BUS_CONFIG_OBP_ARGS)
+ op = BUS_CONFIG_ONE;
+
+ ret = ndi_busop_bus_config(
+ parent, flag, op, devname, child, 0);
+ IBTF_DPRINTF_L4("ibnex", "\thca_bus_config:"
+ "ndi_busop_bus_config : retval %d", ret);
+ return (ret);
+ }
+
+ return (NDI_FAILURE);
+}
+
+/*
+ * ibnex_hca_bus_unconfig()
+ *
+ * Unconfigure a particular device node or all instance of a device
+ * driver device or all children of IBnex
+ */
+static int
+ibnex_hca_bus_unconfig(dev_info_t *parent,
+ uint_t flag, ddi_bus_config_op_t op, void *device_name)
+{
+
+ if (ndi_busop_bus_unconfig(parent, flag, op, device_name) !=
+ DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if ((op == BUS_UNCONFIG_ALL || op == BUS_UNCONFIG_DRIVER) &&
+ (flag & NDI_UNCONFIG)) {
+ ibnex_node_data_t *ndp;
+ dev_info_t *dip = NULL;
+ major_t major = (major_t)(uintptr_t)device_name;
+
+ mutex_enter(&ibnex.ibnex_mutex);
+
+ if (major == -1) {
+ /*
+ * HCA dip. When major number is -1 HCA is
+ * going away cleanup all the port nodes.
+ */
+ for (ndp = ibnex.ibnex_port_node_head;
+ ndp; ndp = ndp->node_next) {
+ ibnex_port_node_t *port_node;
+
+ port_node = &ndp->node_data.port_node;
+ if (port_node->port_pdip == parent) {
+ port_node->port_pdip = NULL;
+ ndp->node_dip = NULL;
+ ndp->node_state =
+ IBNEX_CFGADM_UNCONFIGURED;
+ }
+ }
+ } else {
+ /*
+ * HCA dip. Cleanup only the port nodes that
+ * match the major number.
+ */
+ for (ndp = ibnex.ibnex_port_node_head;
+ ndp; ndp = ndp->node_next) {
+ ibnex_port_node_t *port_node;
+
+ port_node = &ndp->node_data.port_node;
+ dip = ndp->node_dip;
+ if (dip && (ddi_driver_major(dip) ==
+ major) && port_node->port_pdip ==
+ parent) {
+ port_node->port_pdip = NULL;
+ ndp->node_dip = NULL;
+ ndp->node_state =
+ IBNEX_CFGADM_UNCONFIGURED;
+ }
+ }
+ }
+ mutex_exit(&ibnex.ibnex_mutex);
+ }
+ return (DDI_SUCCESS);
+}
+
+/*
+ * ibnex_config_obp_args()
+ * Configures a particular port node for a IP over IB communication
+ * service.
+ * The format of the input string "devname" is
+ * port=x,pkey=y,protocol=ip,<wanboot options>
+ * Thr format of the node name created here is
+ * ibport@<Port#>,<pkey>,<service name>
+ * where pkey = 0 for port communication service nodes
+ * Returns "dev_info_t" of the "child" node just created
+ * NULL when failed to enumerate the child node
+ *
+ */
+static dev_info_t *
+ibnex_config_obp_args(dev_info_t *parent, char *devname)
+{
+ int ii, index;
+ int rval, iter = 0;
+ char *temp;
+ uint8_t port_num;
+ ib_guid_t hca_guid, port_guid;
+ ib_pkey_t pkey;
+ dev_info_t *cdip;
+ boolean_t displayed = B_FALSE;
+ ibdm_port_attr_t *port_attr;
+
+ IBTF_DPRINTF_L4("ibnex", "\tconfig_obp_args: %s", devname);
+
+ /* Is this OBP node for IPoIB ? */
+ temp = devname;
+ do {
+ temp = strstr(temp, ",protocol=ip");
+ if (temp == NULL)
+ break;
+
+ if (strlen(devname) > (int)((temp - devname) + 12)) {
+ if (temp[12] == ',')
+ break;
+ } else {
+ break;
+ }
+ temp++;
+ } while (temp);
+
+ if (temp == NULL)
+ return (NULL);
+ if (ibnex_prom_devname_to_pkey_n_portnum(
+ devname, &pkey, &port_num) != IBNEX_SUCCESS) {
+ return (NULL);
+ }
+ for (index = 0; index < ibnex.ibnex_nvppa_comm_svcs; index++) {
+ if (strcmp(ibnex.ibnex_vppa_comm_svc_names[index],
+ "ipib") == 0) {
+ break;
+ }
+ }
+
+ hca_guid = ibtl_ibnex_hcadip2guid(parent);
+ if ((port_attr = ibdm_ibnex_probe_hcaport(
+ hca_guid, port_num)) == NULL) {
+ IBTF_DPRINTF_L2("ibnex",
+ "\tconfig_port_node: Port does not exist");
+ return (NULL);
+ }
+
+ /* Wait until "port is up" */
+ while (port_attr->pa_state != IBT_PORT_ACTIVE) {
+ ibdm_ibnex_free_port_attr(port_attr);
+ delay(drv_usectohz(10000));
+ if ((port_attr = ibdm_ibnex_probe_hcaport(
+ hca_guid, port_num)) == NULL) {
+ return (NULL);
+ }
+ if (iter++ == 400) {
+ if (displayed == B_FALSE) {
+ cmn_err(CE_NOTE, "\tWaiting for Port %d "
+ "initialization", port_attr->pa_port_num);
+ displayed = B_TRUE;
+ }
+ }
+ }
+ IBTF_DPRINTF_L4("ibnex", "\tPort is initialized");
+
+ mutex_enter(&ibnex.ibnex_mutex);
+ port_guid = port_attr->pa_port_guid;
+ rval = ibnex_get_dip_from_guid(port_guid, index, pkey, &cdip);
+ if (rval == IBNEX_SUCCESS && cdip != NULL) {
+ IBTF_DPRINTF_L4("ibnex", "\tconfig_port_node: Node exists");
+ mutex_exit(&ibnex.ibnex_mutex);
+ ibdm_ibnex_free_port_attr(port_attr);
+ return (cdip);
+ }
+ for (ii = 0; ii < port_attr->pa_npkeys; ii++) {
+ if (pkey == port_attr->pa_pkey_tbl[ii].pt_pkey) {
+ cdip = ibnex_commsvc_initnode(parent, port_attr,
+ index, IBNEX_VPPA_COMMSVC_NODE, pkey, &rval,
+ IBNEX_CFGADM_ENUMERATE);
+ IBTF_DPRINTF_L5("ibnex",
+ "\t ibnex_commsvc_initnode rval %x", rval);
+ break;
+ }
+ }
+ mutex_exit(&ibnex.ibnex_mutex);
+
+ ibdm_ibnex_free_port_attr(port_attr);
+ return (cdip);
+}
+
+
+/*
+ * ibnex_prom_devname_to_pkey_n_portnum()
+ * Parses the device node name and extracts "PKEY" and "port#"
+ * Returns IBNEX_SUCCESS/IBNEX_FAILURE
+ */
+static int
+ibnex_prom_devname_to_pkey_n_portnum(
+ char *devname, ib_pkey_t *pkey, uint8_t *port)
+{
+ int ret = IBNEX_SUCCESS;
+ char *tmp, *tmp1;
+
+ if ((tmp = strstr(devname, "port=")) != NULL) {
+ if ((tmp = strchr(++tmp, '=')) != NULL)
+ if ((tmp1 = strchr(++tmp, ',')) != NULL)
+ *port = ibnex_str2int(tmp, (tmp1 - tmp), &ret);
+ } else
+ ret = IBNEX_FAILURE;
+
+ if ((ret == IBNEX_SUCCESS) &&
+ (tmp = strstr(devname, "pkey=")) != NULL) {
+ if ((tmp = strchr(++tmp, '=')) != NULL)
+ if ((tmp1 = strchr(++tmp, ',')) != NULL)
+ *pkey = ibnex_str2hex(tmp, (tmp1 - tmp), &ret);
+ } else
+ ret = IBNEX_FAILURE;
+
+ return (ret);
+}
+
+static ibnex_node_data_t *
+ibnex_get_cdip_info(dev_info_t *parent,
+ char *devname, dev_info_t **cdip, ibnex_node_type_t *type)
+{
+ char *device_name, *cname = NULL, *caddr = NULL;
+ int len;
+ ibnex_node_data_t *node_data = NULL;
+
+ len = strlen((char *)devname) + 1;
+ device_name = i_ddi_strdup(devname, KM_SLEEP);
+ i_ddi_parse_name(device_name, &cname, &caddr, NULL);
+
+ IBTF_DPRINTF_L4("ibnex",
+ "\tfind_child_dip: cname %s addr %s", cname, caddr);
+
+ if (strncmp(cname, IBNEX_IOC_CNAME, 3) == 0)
+ *type = IBNEX_IOC_NODE;
+ else if (strncmp(cname, IBNEX_IBPORT_CNAME, 3) == 0)
+ *type = IBNEX_HCA_CHILD_NODE;
+ else
+ *type = IBNEX_PSEUDO_NODE;
+
+ *cdip = ndi_devi_findchild(parent, devname);
+
+ IBTF_DPRINTF_L4("ibnex",
+ "\tfind_child_dip: cdip %p type %x", *cdip, *type);
+
+ if (*cdip)
+ node_data = ddi_get_parent_data(*cdip);
+ kmem_free(device_name, len);
+
+ return (node_data);
+}
+
+static int
+ibnex_hca_bus_config_one(dev_info_t *parent, void *devname,
+ddi_bus_config_op_t op, uint_t *flag, dev_info_t **child)
+{
+ int ret = IBNEX_SUCCESS, len, circ, need_bus_config;
+ char *device_name, *caddr, *cname;
+ dev_info_t *cdip;
+ ibnex_node_data_t *node_data;
+ ibnex_node_type_t node_type;
+ int index;
+ uint8_t port_num;
+ ib_pkey_t pkey;
+
+ len = strlen((char *)devname) + 1;
+ device_name = i_ddi_strdup(devname, KM_SLEEP);
+ i_ddi_parse_name(device_name, &cname, &caddr, NULL);
+
+ if (caddr == NULL || (strlen(caddr) == 0)) {
+ IBTF_DPRINTF_L2("ibnex",
+ "\thca_bus_config: Invalid device node address");
+ kmem_free(device_name, len);
+ return (IBNEX_FAILURE);
+ }
+
+ ndi_devi_enter(parent, &circ);
+ node_data = ibnex_get_cdip_info(
+ parent, devname, &cdip, &node_type);
+ ndi_devi_exit(parent, circ);
+
+ if (cdip) {
+ if ((node_data) && (node_data->node_type ==
+ IBNEX_PORT_COMMSVC_NODE)) {
+ if (node_data->node_dip == NULL) {
+ node_data->node_dip = cdip;
+ node_data->node_data.port_node.port_pdip =
+ parent;
+ }
+ }
+ }
+
+ /*
+ * If child dip is present, just return
+ * from here.
+ */
+ if (cdip != NULL || (node_data != NULL &&
+ node_data->node_dip != NULL)) {
+ goto end;
+ }
+
+ switch (node_type) {
+
+ case IBNEX_IOC_NODE:
+ ret = ibnex_ioc_bus_config_one(&parent, *flag,
+ op, devname, child, &need_bus_config);
+ if (!need_bus_config) {
+ kmem_free(device_name, len);
+ return (ret);
+ }
+ break;
+
+ case IBNEX_PSEUDO_NODE:
+ ret = IBNEX_SUCCESS;
+ mdi_devi_enter(parent, &circ);
+ ibnex_pseudo_initnodes();
+ mutex_enter(&ibnex.ibnex_mutex);
+ ret = ibnex_pseudo_config_one(NULL,
+ caddr, parent);
+ mutex_exit(&ibnex.ibnex_mutex);
+ mdi_devi_exit(parent, circ);
+ break;
+
+ default:
+ if (ibnex_get_pkey_commsvc_index_portnum(devname,
+ &index, &pkey, &port_num) != IBNEX_SUCCESS) {
+ IBTF_DPRINTF_L2("ibnex",
+ "\tconfig_port_node: Invalid Service Name");
+ return (IBNEX_FAILURE);
+ }
+
+ if ((pkey != 0) && (port_num != 0)) {
+ if (strcmp("ipib",
+ ibnex.ibnex_vppa_comm_svc_names[index]) == 0) {
+ IBTF_DPRINTF_L2("ibnex",
+ "Skipping IBD devices... ");
+ break;
+ }
+ }
+
+ ndi_devi_enter(parent, &circ);
+ cdip = ibnex_config_port_node(parent, devname);
+ if (cdip)
+ ret = IBNEX_SUCCESS;
+ else
+ ret = IBNEX_FAILURE;
+ ndi_devi_exit(parent, circ);
+ break;
+ }
+end:
+ if (node_type == IBNEX_HCA_CHILD_NODE) {
+ /* Allows enumeration under PHCI */
+ *flag |= NDI_MDI_FALLBACK;
+ }
+ kmem_free(device_name, len);
+ return (ret);
+}
+
+void
+ibnex_handle_hca_attach(void *cb_arg)
+{
+ ib_guid_t hca_guid = *((ib_guid_t *)cb_arg);
+ dev_info_t *phci;
+ int ii, circ;
+ ibdm_hca_list_t *hca_list;
+
+ IBTF_DPRINTF_L4("ibnex", "handle_hca_attach(%llx)", hca_guid);
+
+ phci = ibtl_ibnex_hcaguid2dip(hca_guid);
+
+ /*
+ * Enumerate children of this HCA, port nodes,
+ * VPPA & HCA_SVC nodes. Use ndi_devi_enter() for
+ * locking. IB Nexus is enumerating the children
+ * of HCA, not MPXIO clients.
+ */
+ ndi_devi_enter(phci, &circ);
+ ibdm_ibnex_port_settle_wait(hca_guid, ibnex_port_settling_time);
+ hca_list = ibdm_ibnex_get_hca_info_by_guid(hca_guid);
+ if (hca_list == NULL) {
+ ndi_devi_exit(phci, circ);
+ return;
+ }
+ ibnex_create_hcasvc_nodes(phci, hca_list->hl_hca_port_attr);
+ for (ii = 0; ii < hca_list->hl_nports; ii++) {
+ ibnex_create_vppa_nodes(
+ phci, &hca_list->hl_port_attr[ii]);
+ }
+ ibdm_ibnex_free_hca_list(hca_list);
+ ndi_devi_exit(phci, circ);
+}
--- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -58,12 +57,14 @@
static int ibnex_fill_ioc_tmp(nvlist_t **, ibdm_ioc_info_t *);
static int ibnex_fill_nodeinfo(nvlist_t **, ibnex_node_data_t *,
void *);
-static void ibnex_figure_ap_devstate(dev_info_t *,
+static void ibnex_figure_ap_devstate(ibnex_node_data_t *,
devctl_ap_state_t *);
static void ibnex_figure_ib_apid_devstate(devctl_ap_state_t *);
static char *ibnex_get_apid(struct devctl_iocdata *);
static int ibnex_get_dip_from_apid(char *, dev_info_t **,
ibnex_node_data_t **);
+extern int ibnex_get_node_and_dip_from_guid(ib_guid_t, int,
+ ib_pkey_t, ibnex_node_data_t **, dev_info_t **);
static ibnex_rval_t ibnex_handle_pseudo_configure(char *);
static ibnex_rval_t ibnex_handle_ioc_configure(char *);
static ibnex_rval_t ibnex_handle_commsvcnode_configure(char *);
@@ -212,7 +213,8 @@
ibnex_rval_t ret_val;
ib_service_type_t svc_type = IB_NONE;
devctl_ap_state_t ap_state;
- ibnex_node_data_t *nodep, *scanp;
+ ibnex_node_data_t *nodep = NULL;
+ ibnex_node_data_t *scanp;
struct devctl_iocdata *dcp = NULL;
IBTF_DPRINTF_L4("ibnex", "\tdevctl: cmd=%x, arg=%p, mode=%x, cred=%p, "
@@ -257,7 +259,7 @@
/* rv could be something undesirable, so reset it */
rv = 0;
- ibnex_figure_ap_devstate(apid_dip, &ap_state);
+ ibnex_figure_ap_devstate(nodep, &ap_state);
}
/* copy the return-AP-state information to the user space */
@@ -1390,9 +1392,9 @@
"node_type = %x", hca_guid, port_guid, svc_index, p_key, node_type);
/* check if this node was seen before? */
- rval = ibnex_get_dip_from_guid(port_guid, svc_index, p_key, &dip);
- if (rval == IBNEX_SUCCESS && dip) {
- nodep = ddi_get_parent_data(dip);
+ rval = ibnex_get_node_and_dip_from_guid(port_guid, svc_index, p_key,
+ &nodep, &dip);
+ if (rval == IBNEX_SUCCESS && nodep != NULL) {
if (ibnex_fill_nodeinfo(nvlpp, nodep, NULL) != 0) {
IBTF_DPRINTF_L2("ibnex",
@@ -1646,7 +1648,7 @@
IBNEX_NODE_TYPE_NVL, node_datap->node_type);
/* figure out "ostate", "rstate" and "condition" */
- ibnex_figure_ap_devstate(node_datap->node_dip, &state);
+ ibnex_figure_ap_devstate(node_datap, &state);
if (nvlist_add_int32(*nvlpp, IBNEX_NODE_RSTATE_NVL, state.ap_rstate)) {
IBTF_DPRINTF_L2("ibnex", "ibnex_fill_nodeinfo: "
@@ -1684,16 +1686,19 @@
* "last_change" value.
*/
static void
-ibnex_figure_ap_devstate(dev_info_t *dip, devctl_ap_state_t *ap_state)
+ibnex_figure_ap_devstate(ibnex_node_data_t *nodep, devctl_ap_state_t *ap_state)
{
- IBTF_DPRINTF_L5("ibnex", "ibnex_figure_ap_devstate: dip = %p", dip);
+ IBTF_DPRINTF_L5("ibnex", "ibnex_figure_ap_devstate: nodep = %p", nodep);
ap_state->ap_rstate = AP_RSTATE_CONNECTED;
- if (dip == NULL) { /* for nodes not seen by IBNEX yet */
+ if (nodep == NULL) { /* for nodes not seen by IBNEX yet */
ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
ap_state->ap_condition = AP_COND_UNKNOWN;
} else {
- if (i_ddi_node_state(dip) < DS_BOUND) {
+ /*
+ * IBNEX_NODE_AP_UNCONFIGURED & IBNEX_NODE_AP_CONFIGURING.
+ */
+ if (nodep->node_ap_state >= IBNEX_NODE_AP_UNCONFIGURED) {
ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
ap_state->ap_condition = AP_COND_UNKNOWN;
} else {
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -110,6 +109,8 @@
static void ibtl_kstat_stats_create(ibtl_hca_devinfo_t *, uint_t);
static void ibtl_kstat_pkeys_create(ibtl_hca_devinfo_t *, uint_t);
+extern kmutex_t ibtl_part_attr_mutex;
+
/*
* IBTF Loadable Module Routines.
*/
@@ -147,6 +148,8 @@
mutex_init(&ibtl_qp_mutex, NULL, MUTEX_DEFAULT, NULL);
cv_init(&ibtl_qp_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&ibtl_part_attr_mutex, NULL, MUTEX_DEFAULT, NULL);
+
ibtl_thread_init();
return (rval);
@@ -173,6 +176,7 @@
cv_destroy(&ibtl_close_hca_cv);
mutex_destroy(&ibtl_qp_mutex);
cv_destroy(&ibtl_qp_cv);
+ mutex_destroy(&ibtl_part_attr_mutex);
/*
* Stop Logging
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_misc.c Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ksynch.h>
+#include <sys/ib/ibtl/ibti_common.h>
+
+kmutex_t ibtl_part_attr_mutex;
+ibt_status_t (*ibtl_get_part_attr_cb)(datalink_id_t, ibt_part_attr_t *);
+ibt_status_t (*ibtl_get_all_part_attr_cb)(ibt_part_attr_t **, int *);
+
+void
+ibt_register_part_attr_cb(
+ ibt_status_t (*get_part_attr)(datalink_id_t, ibt_part_attr_t *),
+ ibt_status_t (*get_all_part_attr)(ibt_part_attr_t **, int *))
+{
+ mutex_enter(&ibtl_part_attr_mutex);
+ ibtl_get_part_attr_cb = get_part_attr;
+ ibtl_get_all_part_attr_cb = get_all_part_attr;
+ mutex_exit(&ibtl_part_attr_mutex);
+}
+
+void
+ibt_unregister_part_attr_cb(void)
+{
+ mutex_enter(&ibtl_part_attr_mutex);
+ ibtl_get_part_attr_cb = NULL;
+ ibtl_get_all_part_attr_cb = NULL;
+ mutex_exit(&ibtl_part_attr_mutex);
+}
+
+ibt_status_t
+ibt_get_part_attr(datalink_id_t linkid, ibt_part_attr_t *attr)
+{
+ ibt_status_t status;
+
+ mutex_enter(&ibtl_part_attr_mutex);
+ if (ibtl_get_part_attr_cb != NULL)
+ status = (*ibtl_get_part_attr_cb) (linkid, attr);
+ else
+ status = IBT_NO_SUCH_OBJECT;
+ mutex_exit(&ibtl_part_attr_mutex);
+
+ return (status);
+}
+
+ibt_status_t
+ibt_get_all_part_attr(ibt_part_attr_t **attr, int *nparts)
+{
+ ibt_status_t status;
+
+ mutex_enter(&ibtl_part_attr_mutex);
+ if (ibtl_get_all_part_attr_cb != NULL)
+ status = (*ibtl_get_all_part_attr_cb) (attr, nparts);
+ else {
+ *attr = NULL;
+ *nparts = 0;
+ status = IBT_SUCCESS;
+ }
+ mutex_exit(&ibtl_part_attr_mutex);
+
+ return (status);
+}
+
+ibt_status_t
+ibt_free_part_attr(ibt_part_attr_t *attr, int nparts)
+{
+ if (nparts > 0)
+ kmem_free(attr, sizeof (ibt_part_attr_t) * nparts);
+ return (IBT_SUCCESS);
+}
--- a/usr/src/uts/common/io/ib/inc.flg Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/inc.flg Wed Apr 14 10:26:18 2010 -0700
@@ -21,8 +21,7 @@
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
#
#
@@ -62,14 +61,14 @@
usr/src/uts/sparc/ibdm \
usr/src/uts/sparc/ibmf \
usr/src/uts/sparc/ibtl \
- usr/src/uts/sparc/ibd \
+ usr/src/uts/sparc/ibp \
usr/src/uts/sparc/rpcib \
usr/src/uts/intel/ib \
usr/src/uts/intel/ibcm \
usr/src/uts/intel/ibdm \
usr/src/uts/intel/ibmf \
usr/src/uts/intel/ibtl \
- usr/src/uts/intel/ibd \
+ usr/src/uts/intel/ibp \
usr/src/uts/intel/rpcib
# packaging files
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
@@ -38,6 +37,8 @@
#include <sys/kstr.h>
#include <sys/t_kuser.h>
+#include <sys/dls.h>
+
extern char cmlog[];
extern int ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s,
@@ -202,64 +203,55 @@
return (IBT_SUCCESS);
}
-
-static int
-ibcm_arp_get_ibd_insts_cb(dev_info_t *dip, void *arg)
+void
+ibcm_arp_free_ibds(ibcm_arp_ibd_insts_t *ibds)
{
- ibcm_arp_ibd_insts_t *ibds = (ibcm_arp_ibd_insts_t *)arg;
- ibcm_arp_ip_t *ipp;
- ib_pkey_t pkey;
- uint8_t port;
- ib_guid_t hca_guid;
- ib_gid_t port_gid;
-
- if (i_ddi_devi_attached(dip) &&
- (strcmp(ddi_node_name(dip), "ibport") == 0) &&
- (strstr(ddi_get_name_addr(dip), "ipib") != NULL)) {
-
- if (ibds->ibcm_arp_ibd_cnt >= ibds->ibcm_arp_ibd_alloc) {
- ibcm_arp_ip_t *tmp = NULL;
- uint8_t new_count;
-
- new_count = ibds->ibcm_arp_ibd_alloc +
- IBCM_ARP_IBD_INSTANCES;
-
- tmp = (ibcm_arp_ip_t *)kmem_zalloc(
- new_count * sizeof (ibcm_arp_ip_t), KM_SLEEP);
- bcopy(ibds->ibcm_arp_ip, tmp,
- ibds->ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
- kmem_free(ibds->ibcm_arp_ip,
- ibds->ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
- ibds->ibcm_arp_ibd_alloc = new_count;
- ibds->ibcm_arp_ip = tmp;
- }
-
- if (((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
- "hca-guid", 0)) == 0) ||
- ((port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
- "port-number", 0)) == 0) ||
- (ibt_get_port_state_byguid(hca_guid, port, &port_gid,
- NULL) != IBT_SUCCESS) ||
- ((pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
- "port-pkey", IB_PKEY_INVALID_LIMITED)) <=
- IB_PKEY_INVALID_FULL)) {
- return (DDI_WALK_CONTINUE);
- }
-
- ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt];
- ipp->ip_inst = ddi_get_instance(dip);
- ipp->ip_pkey = pkey;
- ipp->ip_hca_guid = hca_guid;
- ipp->ip_port_gid = port_gid;
- ibds->ibcm_arp_ibd_cnt++;
+ if (ibds->ibcm_arp_ip) {
+ kmem_free(ibds->ibcm_arp_ip, ibds->ibcm_arp_ibd_alloc *
+ sizeof (ibcm_arp_ip_t));
+ ibds->ibcm_arp_ibd_alloc = 0;
+ ibds->ibcm_arp_ibd_cnt = 0;
+ ibds->ibcm_arp_ip = NULL;
}
- return (DDI_WALK_CONTINUE);
}
static void
ibcm_arp_get_ibd_insts(ibcm_arp_ibd_insts_t *ibds)
{
- ddi_walk_devs(ddi_root_node(), ibcm_arp_get_ibd_insts_cb, ibds);
+ ibcm_arp_ip_t *ipp;
+ ib_gid_t port_gid;
+ ibt_part_attr_t *attr_list, *attr;
+ int nparts;
+
+ if ((ibt_get_all_part_attr(&attr_list, &nparts) != IBT_SUCCESS) ||
+ (nparts == 0)) {
+ ibds->ibcm_arp_ibd_alloc = 0;
+ ibds->ibcm_arp_ibd_cnt = 0;
+ ibds->ibcm_arp_ip = NULL;
+ return;
+ }
+
+ ibds->ibcm_arp_ibd_alloc = nparts;
+ ibds->ibcm_arp_ibd_cnt = 0;
+ ibds->ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
+ nparts * sizeof (ibcm_arp_ip_t), KM_SLEEP);
+
+ attr = attr_list;
+ while (nparts--) {
+ if (ibt_get_port_state_byguid(attr->pa_hca_guid,
+ attr->pa_port, &port_gid, NULL) == IBT_SUCCESS) {
+
+ ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt];
+ ipp->ip_linkid = attr->pa_plinkid;
+ ipp->ip_pkey = attr->pa_pkey;
+ ipp->ip_hca_guid = attr->pa_hca_guid;
+ ipp->ip_port_gid = port_gid;
+ ibds->ibcm_arp_ibd_cnt++;
+ }
+ attr++;
+ }
+
+ (void) ibt_free_part_attr(attr_list, ibds->ibcm_arp_ibd_alloc);
}
/*
@@ -331,6 +323,37 @@
return (0);
}
+static ibcm_arp_ip_t *
+ibcm_arp_lookup(ibcm_arp_ibd_insts_t *ibds, char *linkname)
+{
+ datalink_id_t linkid;
+ int i;
+
+ IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: linkname = %s\n", linkname);
+
+ /*
+ * If at first we don't succeed, try again, just in case it is in
+ * hiding. The first call requires the datalink management daemon
+ * (the authorative source of information about name to id mapping)
+ * to be present and answering upcalls, the second does not.
+ */
+ if (dls_mgmt_get_linkid(linkname, &linkid) != 0) {
+ if (dls_devnet_macname2linkid(linkname, &linkid) != 0) {
+ IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: could not "
+ "get linkid from linkname\n");
+ return (NULL);
+ }
+ }
+
+ for (i = 0; i < ibds->ibcm_arp_ibd_cnt; i++) {
+ if (ibds->ibcm_arp_ip[i].ip_linkid == linkid)
+ return (&ibds->ibcm_arp_ip[i]);
+ }
+
+ IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: returning NULL\n");
+ return (NULL);
+}
+
/*
* Fill in `ibds' with IP addresses tied to IFT_IB IP interfaces. Returns
* B_TRUE if at least one address was filled in.
@@ -352,12 +375,13 @@
IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibd_ipaddr: Family %d, nifs %d",
family_loc, nifs);
- for (lifrp = lifc.lifc_req, i = 0;
- i < nifs && naddr < ibds->ibcm_arp_ibd_cnt; i++, lifrp++) {
+ for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
if (lifrp->lifr_type != IFT_IB)
continue;
- ipp = &ibds->ibcm_arp_ip[naddr];
+ if ((ipp = ibcm_arp_lookup(ibds, lifrp->lifr_name)) == NULL)
+ continue;
+
switch (lifrp->lifr_addr.ss_family) {
case AF_INET:
ipp->ip_inet_family = AF_INET;
@@ -399,6 +423,7 @@
if (!ibcm_arp_get_ibd_ipaddr(ibdp, family_loc)) {
IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: failed to get "
"ibd instance: IBT_SRC_IP_NOT_FOUND");
+ ibcm_arp_free_ibds(ibdp);
return (IBT_SRC_IP_NOT_FOUND);
}
@@ -407,9 +432,9 @@
char my_buf[INET6_ADDRSTRLEN];
ibcm_arp_ip_t *aip = &ibdp->ibcm_arp_ip[i];
- IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: ibd[%d]: Family %d "
- "Instance %d PKey 0x%lX \n HCAGUID 0x%llX SGID %llX:%llX",
- i, aip->ip_inet_family, aip->ip_inst, aip->ip_pkey,
+ IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: Linkid %d Family %d "
+ "PKey 0x%lX \n HCAGUID 0x%llX SGID %llX:%llX",
+ aip->ip_linkid, aip->ip_inet_family, aip->ip_pkey,
aip->ip_hca_guid, aip->ip_port_gid.gid_prefix,
aip->ip_port_gid.gid_guid);
if (aip->ip_inet_family == AF_INET) {
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c Wed Apr 14 10:26:18 2010 -0700
@@ -6357,12 +6357,6 @@
return (IBT_INVALID_PARAM);
}
- bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t));
- ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES;
- ibds.ibcm_arp_ibd_cnt = 0;
- ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
- ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP);
-
retval = ibcm_arp_get_ibds(&ibds, AF_UNSPEC);
if (retval != IBT_SUCCESS) {
IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
@@ -6403,10 +6397,7 @@
}
get_src_ip_end:
- if (ibds.ibcm_arp_ip)
- kmem_free(ibds.ibcm_arp_ip,
- ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
-
+ ibcm_arp_free_ibds(&ibds);
return (retval);
}
--- a/usr/src/uts/common/io/warlock/ib.wlcmd Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/warlock/ib.wlcmd Wed Apr 14 10:26:18 2010 -0700
@@ -18,10 +18,8 @@
#
# CDDL HEADER END
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
# ibnexus Callback handlers for IBDM
add ibdm.ibdm_ibnex_callback targets ibnex_dm_callback
@@ -46,6 +44,7 @@
# ibnex reprobe function, called from taskq
root ibnex_handle_reprobe_dev
+root ibnex_handle_hca_attach
# ibnex other functions
root ibnex_name_child
--- a/usr/src/uts/common/os/swapgeneric.c Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/os/swapgeneric.c Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
*/
/* ONC_PLUS EXTRACT START */
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1982, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* ONC_PLUS EXTRACT END */
@@ -1000,8 +999,8 @@
* InfiniBand.
*/
if (netboot_over_ib(bootpath) &&
- modloadonly("drv", "ibd") == -1) {
- cmn_err(CE_CONT, "ibd: cannot load platform driver\n");
+ modloadonly("drv", "ibp") == -1) {
+ cmn_err(CE_CONT, "ibp: cannot load platform driver\n");
kmem_free(pathcopy, pathcopy_len);
return (NULL);
}
--- a/usr/src/uts/common/rpc/ib.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/rpc/ib.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2007, The Ohio State University. All rights reserved.
@@ -140,8 +139,6 @@
* ATS relsted defines and structures.
*/
#define ATS_AR_DATA_LEN 16
-#define IBD_NAME "ibd"
-#define N_IBD_INSTANCES 4
/*
--- a/usr/src/uts/common/sys/Makefile Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -261,6 +261,7 @@
hwconf.h \
ia.h \
iapriocntl.h \
+ ibpart.h \
id32.h \
idmap.h \
ieeefp.h \
--- a/usr/src/uts/common/sys/dld_ioc.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/dld_ioc.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DLD_IOC_H
@@ -59,6 +58,7 @@
#define SIMNET_IOC 0x5132
#define IPTUN_IOC 0x454A
#define BRIDGE_IOC 0xB81D
+#define IBPART_IOC 0x6171
/* GLDv3 modules use these macros to generate unique ioctl commands */
#define DLDIOC(cmdid) DLD_IOC_CMD(DLD_IOC, (cmdid))
@@ -67,6 +67,7 @@
#define SIMNETIOC(cmdid) DLD_IOC_CMD(SIMNET_IOC, (cmdid))
#define IPTUNIOC(cmdid) DLD_IOC_CMD(IPTUN_IOC, (cmdid))
#define BRIDGEIOC(cmdid) DLD_IOC_CMD(BRIDGE_IOC, (cmdid))
+#define IBPARTIOC(cmdid) DLD_IOC_CMD(IBPART_IOC, (cmdid))
#ifdef _KERNEL
--- a/usr/src/uts/common/sys/dls_mgmt.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/dls_mgmt.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _DLS_MGMT_H
@@ -46,13 +45,14 @@
DATALINK_CLASS_ETHERSTUB = 0x10,
DATALINK_CLASS_SIMNET = 0x20,
DATALINK_CLASS_BRIDGE = 0x40,
- DATALINK_CLASS_IPTUN = 0x60
+ DATALINK_CLASS_IPTUN = 0x60,
+ DATALINK_CLASS_PART = 0x100
} datalink_class_t;
#define DATALINK_CLASS_ALL (DATALINK_CLASS_PHYS | \
DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC | \
DATALINK_CLASS_ETHERSTUB | DATALINK_CLASS_SIMNET | \
- DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN)
+ DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART)
/*
* A combination of flags and media.
--- a/usr/src/uts/common/sys/ib/clients/ibd/ibd.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/clients/ibd/ibd.h Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_IB_CLIENTS_IBD_H
@@ -82,6 +81,61 @@
#define IBD_SEND 0
#define IBD_RECV 1
+/* Tunables defaults and limits */
+#define IBD_LINK_MODE_UD 0
+#define IBD_LINK_MODE_RC 1
+
+#define IBD_DEF_LINK_MODE IBD_LINK_MODE_RC
+#define IBD_DEF_LSO_POLICY B_TRUE
+#define IBD_DEF_NUM_LSO_BUFS 1024
+#define IBD_DEF_CREATE_BCAST_GROUP B_TRUE
+#define IBD_DEF_COALESCE_COMPLETIONS B_TRUE
+#define IBD_DEF_UD_RX_COMP_COUNT 4
+#define IBD_DEF_UD_RX_COMP_USEC 10
+#define IBD_DEF_UD_TX_COMP_COUNT 16
+#define IBD_DEF_UD_TX_COMP_USEC 300
+#define IBD_DEF_RC_RX_COMP_COUNT 4
+#define IBD_DEF_RC_RX_COMP_USEC 10
+#define IBD_DEF_RC_TX_COMP_COUNT 10
+#define IBD_DEF_RC_TX_COMP_USEC 300
+#define IBD_DEF_UD_TX_COPY_THRESH 4096
+#define IBD_DEF_RC_RX_COPY_THRESH 4096
+#define IBD_DEF_RC_TX_COPY_THRESH 4096
+#define IBD_DEF_UD_NUM_RWQE 4000
+#define IBD_DEF_UD_NUM_SWQE 4000
+#define IBD_DEF_RC_ENABLE_SRQ B_TRUE
+#define IBD_DEF_RC_NUM_RWQE 2047
+#define IBD_DEF_RC_NUM_SWQE 511
+#define IBD_DEF_NUM_AH 256
+#define IBD_DEF_HASH_SIZE 32
+#define IBD_DEF_RC_NUM_SRQ (IBD_DEF_RC_NUM_RWQE - 1)
+#define IBD_DEF_RC_RX_RWQE_THRESH (IBD_DEF_RC_NUM_RWQE >> 2)
+
+/* Tunable limits */
+#define IBD_MIN_NUM_LSO_BUFS 512
+#define IBD_MAX_NUM_LSO_BUFS 4096
+#define IBD_MIN_UD_TX_COPY_THRESH 2048
+#define IBD_MAX_UD_TX_COPY_THRESH 65536
+#define IBD_MIN_UD_NUM_SWQE 512
+#define IBD_MAX_UD_NUM_SWQE 8000
+#define IBD_MIN_UD_NUM_RWQE 512
+#define IBD_MAX_UD_NUM_RWQE 8000
+#define IBD_MIN_NUM_AH 32
+#define IBD_MAX_NUM_AH 8192
+#define IBD_MIN_HASH_SIZE 32
+#define IBD_MAX_HASH_SIZE 1024
+
+#define IBD_MIN_RC_NUM_SWQE 511
+#define IBD_MAX_RC_NUM_SWQE 8000
+#define IBD_MIN_RC_NUM_RWQE 511
+#define IBD_MAX_RC_NUM_RWQE 8000
+#define IBD_MIN_RC_RX_COPY_THRESH 1500
+#define IBD_MAX_RC_RX_COPY_THRESH 65520
+#define IBD_MIN_RC_TX_COPY_THRESH 1500
+#define IBD_MAX_RC_TX_COPY_THRESH 65520
+#define IBD_MIN_RC_NUM_SRQ (IBD_MIN_RC_NUM_RWQE - 1)
+#define IBD_MIN_RC_RX_RWQE_THRESH (IBD_MIN_RC_NUM_RWQE >> 2)
+
/*
* Thresholds
*
@@ -511,7 +565,7 @@
kstat_named_t rc_rwqe_short; /* short rwqe */
kstat_named_t rc_xmt_bytes;
- /* pkt size <= ibd_rc_tx_copy_thresh */
+ /* pkt size <= state->id_rc_tx_copy_thresh */
kstat_named_t rc_xmt_small_pkt;
kstat_named_t rc_xmt_fragmented_pkt;
/* fail in ibt_map_mem_iov() */
@@ -652,6 +706,9 @@
uint_t bkt_nfree;
} ibd_lsobkt_t;
+#define IBD_PORT_DRIVER 0x1
+#define IBD_PARTITION_OBJ 0x2
+
/*
* Posting to a single software rx post queue is contentious,
* so break it out to (multiple) an array of queues.
@@ -673,6 +730,7 @@
* (per network interface).
*/
typedef struct ibd_state_s {
+ uint_t id_type;
dev_info_t *id_dip;
ibt_clnt_hdl_t id_ibt_hdl;
ibt_hca_hdl_t id_hca_hdl;
@@ -720,7 +778,13 @@
ibt_mr_hdl_t id_rx_mr_hdl;
ibt_mr_desc_t id_rx_mr_desc;
uint_t id_rx_buf_sz;
- uint32_t id_num_rwqe;
+ /*
+ * id_ud_num_rwqe
+ * Number of "receive WQE" elements that will be allocated and used
+ * by ibd. This parameter is limited by the maximum channel size of
+ * the HCA. Each buffer in the receive wqe will be of MTU size.
+ */
+ uint32_t id_ud_num_rwqe;
ibd_list_t id_rx_list;
ddi_softintr_t id_rx;
uint32_t id_rx_bufs_outstanding_limit;
@@ -789,7 +853,16 @@
uint64_t id_num_intrs;
uint64_t id_tx_short;
- uint32_t id_num_swqe;
+ /*
+ * id_ud_num_swqe
+ * Number of "send WQE" elements that will be allocated and used by
+ * ibd. When tuning this parameter, the size of pre-allocated, pre-
+ * mapped copy buffer in each of these send wqes must be taken into
+ * account. This copy buffer size is determined by the value of
+ * IBD_TX_BUF_SZ (this is currently set to the same value of
+ * ibd_tx_copy_thresh, but may be changed independently if needed).
+ */
+ uint32_t id_ud_num_swqe;
uint64_t id_xmt_bytes;
uint64_t id_rcv_bytes;
@@ -953,6 +1026,112 @@
#ifdef DEBUG
kstat_t *rc_ksp;
#endif
+ ib_guid_t id_hca_guid;
+ ib_guid_t id_port_guid;
+ datalink_id_t id_dlinkid;
+ datalink_id_t id_plinkid;
+ int id_port_inst;
+ struct ibd_state_s *id_next;
+ boolean_t id_force_create;
+ boolean_t id_bgroup_present;
+ uint_t id_hca_max_chan_sz;
+
+ /*
+ * UD Mode Tunables
+ *
+ * id_ud_tx_copy_thresh
+ * This sets the threshold at which ibd will attempt to do a bcopy
+ * of the outgoing data into a pre-mapped buffer. IPoIB driver's
+ * send behavior is restricted by various parameters, so setting of
+ * this value must be made after careful considerations only. For
+ * instance, IB HCAs currently impose a relatively small limit
+ * (when compared to ethernet NICs) on the length of the SGL for
+ * transmit. On the other hand, the ip stack could send down mp
+ * chains that are quite long when LSO is enabled.
+ *
+ * id_num_lso_bufs
+ * Number of "larger-than-MTU" copy buffers to use for cases when the
+ * outgoing mblk chain is too fragmented to be used with
+ * ibt_map_mem_iov() and too large to be used with regular MTU-sized
+ * copy buffers. It is not recommended to tune this variable without
+ * understanding the application environment and/or memory resources.
+ * The size of each of these lso buffers is determined by the value of
+ * IBD_LSO_BUFSZ.
+ *
+ * id_num_ah
+ * Number of AH cache entries to allocate
+ *
+ * id_hash_size
+ * Hash table size for the active AH list
+ *
+ */
+ uint_t id_ud_tx_copy_thresh;
+ uint_t id_num_lso_bufs;
+ uint_t id_num_ah;
+ uint_t id_hash_size;
+
+ boolean_t id_create_broadcast_group;
+
+ boolean_t id_allow_coalesce_comp_tuning;
+ uint_t id_ud_rx_comp_count;
+ uint_t id_ud_rx_comp_usec;
+ uint_t id_ud_tx_comp_count;
+ uint_t id_ud_tx_comp_usec;
+
+ /* RC Mode Tunables */
+
+ uint_t id_rc_rx_comp_count;
+ uint_t id_rc_rx_comp_usec;
+ uint_t id_rc_tx_comp_count;
+ uint_t id_rc_tx_comp_usec;
+ /*
+ * id_rc_tx_copy_thresh
+ * This sets the threshold at which ibd will attempt to do a bcopy
+ * of the outgoing data into a pre-mapped buffer.
+ *
+ * id_rc_rx_copy_thresh
+ * If (the size of incoming buffer <= id_rc_rx_copy_thresh), ibd
+ * will attempt to allocate a buffer and do a bcopy of the incoming
+ * data into the allocated buffer.
+ *
+ * id_rc_rx_rwqe_thresh
+ * If (the number of available rwqe < ibd_rc_rx_rwqe_thresh), ibd
+ * will attempt to allocate a buffer and do a bcopy of the incoming
+ * data into the allocated buffer.
+ *
+ * id_rc_num_swqe
+ * 1) Send CQ size = ibd_rc_num_swqe
+ * 2) The send queue size = ibd_rc_num_swqe -1
+ * 3) Number of pre-allocated Tx buffers for ibt_post_send() =
+ * ibd_rc_num_swqe - 1.
+ *
+ * id_rc_num_rwqe
+ * 1) For non-SRQ, we pre-post ibd_rc_num_rwqe number of WRs
+ * via ibt_post_receive() for receive queue of each RC channel.
+ * 2) For SRQ and non-SRQ, receive CQ size = ibd_rc_num_rwqe
+ *
+ * For SRQ
+ * If using SRQ, we allocate ibd_rc_num_srq number of buffers (the
+ * size of each buffer is equal to RC mtu). And post them by
+ * ibt_post_srq().
+ *
+ * id_rc_num_srq
+ * ibd_rc_num_srq should not be larger than ibd_rc_num_rwqe,
+ * otherwise it will cause a bug with the following warnings:
+ * NOTICE: hermon0: Device Error: EQE cq overrun or protection error
+ * NOTICE: hermon0: Device Error: EQE local work queue catastrophic
+ * error
+ * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
+ * catastrophic channel error
+ * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
+ * completion queue error
+ */
+ uint_t id_rc_tx_copy_thresh;
+ uint_t id_rc_rx_copy_thresh;
+ uint_t id_rc_rx_rwqe_thresh;
+ uint_t id_rc_num_swqe;
+ uint_t id_rc_num_rwqe;
+ uint_t id_rc_num_srq;
} ibd_state_t;
/*
--- a/usr/src/uts/common/sys/ib/ibnex/ibnex.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/ibnex/ibnex.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_IB_IBNEX_IBNEX_H
@@ -86,13 +85,16 @@
* Any changes to these need to be reflected in that file as well.
*/
typedef enum {
- IBNEX_PORT_COMMSVC_NODE,
- IBNEX_VPPA_COMMSVC_NODE,
- IBNEX_HCASVC_COMMSVC_NODE,
- IBNEX_IOC_NODE,
- IBNEX_PSEUDO_NODE
+ IBNEX_PORT_COMMSVC_NODE = 0,
+ IBNEX_VPPA_COMMSVC_NODE = 1,
+ IBNEX_HCASVC_COMMSVC_NODE = 2,
+ IBNEX_IOC_NODE = 4,
+ IBNEX_PSEUDO_NODE = 8
} ibnex_node_type_t;
+#define IBNEX_HCA_CHILD_NODE (IBNEX_PORT_COMMSVC_NODE | \
+ IBNEX_VPPA_COMMSVC_NODE | IBNEX_HCASVC_COMMSVC_NODE)
+
/*
* Defines for Child device node state:
@@ -223,6 +225,8 @@
kcondvar_t ibnex_ioc_list_cv;
uint32_t ibnex_ioc_list_state;
ibdm_ioc_info_t *ibnex_ioc_list;
+
+ ddi_taskq_t *ibnex_taskq_id;
} ibnex_t;
/*
@@ -284,6 +288,10 @@
#define IBNEX_HW_NOT_IN_DEVTREE 0
#define IBNEX_HW_IN_DEVTREE 1
+/*
+ * Function prototype declarations
+ */
+
#ifdef __cplusplus
}
#endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_IB_IBTL_IBTI_COMMON_H
@@ -32,6 +31,9 @@
* This file contains the shared/common transport data types and function
* prototypes.
*/
+#include <sys/types.h>
+#include <sys/ib/ib_types.h>
+#include <sys/ib/ibtl/ibtl_status.h>
#include <sys/ib/ibtl/ibtl_types.h>
#include <sys/ib/ibtl/ibti_cm.h>
#include <sys/isa_defs.h>
@@ -1910,6 +1912,28 @@
ibt_status_t ibt_free_io_mem(ibt_hca_hdl_t, ibt_mem_alloc_hdl_t);
+/*
+ * Interfaces to get IB partition information.
+ */
+
+typedef struct ibt_part_attr_s {
+ datalink_id_t pa_dlinkid;
+ datalink_id_t pa_plinkid;
+ uint8_t pa_port;
+ ib_guid_t pa_hca_guid;
+ ib_guid_t pa_port_guid;
+ ib_pkey_t pa_pkey;
+} ibt_part_attr_t;
+
+void ibt_register_part_attr_cb(
+ ibt_status_t (*)(datalink_id_t, ibt_part_attr_t *),
+ ibt_status_t (*)(ibt_part_attr_t **, int *));
+void ibt_unregister_part_attr_cb(void);
+
+ibt_status_t ibt_get_part_attr(datalink_id_t, ibt_part_attr_t *);
+ibt_status_t ibt_get_all_part_attr(ibt_part_attr_t **, int *);
+ibt_status_t ibt_free_part_attr(ibt_part_attr_t *, int);
+
#ifdef __cplusplus
}
#endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_IB_IBTL_IBTL_STATUS_H
@@ -124,6 +123,7 @@
/* records was returned. */
IBT_DEST_IP_GID_NOT_FOUND = 25, /* No IP to GID Mapping */
IBT_SRC_IP_NOT_FOUND = 26, /* SRC IP Endpoint not found */
+ IBT_NO_SUCH_OBJECT = 27, /* No such object */
/*
* Resource Errors
--- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_IB_MGT_IBCM_IBCM_ARP_H
@@ -79,10 +78,8 @@
ibcm_arp_prwqn_t *wqnp;
} ibcm_arp_streams_t;
-#define IBCM_ARP_IBD_INSTANCES 4
-
typedef struct ibcm_arp_ip_s {
- uint8_t ip_inst;
+ datalink_id_t ip_linkid;
ib_pkey_t ip_pkey;
ib_guid_t ip_hca_guid;
ib_gid_t ip_port_gid;
@@ -105,6 +102,7 @@
ibt_ip_addr_t destip, ib_gid_t *sgid, ib_gid_t *dgid,
ibt_ip_addr_t *saddr_p);
ibt_status_t ibcm_arp_get_ibds(ibcm_arp_ibd_insts_t *ibdp, sa_family_t fam);
+void ibcm_arp_free_ibds(ibcm_arp_ibd_insts_t *ibds);
#ifdef __cplusplus
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ibpart.h Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _SYS_IBPART_H
+#define _SYS_IBPART_H
+
+#include <sys/types.h>
+#include <sys/ib/ib_types.h>
+#include <sys/dld_ioc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IBD_CREATE_IBPART IBPARTIOC(1)
+#define IBD_DELETE_IBPART IBPARTIOC(2)
+#define IBD_INFO_IBPART IBPARTIOC(3)
+
+#define IBD_INFO_CMD_IBPART 1
+#define IBD_INFO_CMD_IBPORT 2
+#define IBD_INFO_CMD_PKEYTBLSZ 3
+
+typedef enum ibd_part_err_e {
+ IBD_INVALID_PORT_INST = 1,
+ IBD_PORT_IS_DOWN,
+ IBD_PKEY_NOT_PRESENT,
+ IBD_INVALID_PKEY,
+ IBD_PARTITION_EXISTS,
+ IBD_NO_HW_RESOURCE,
+ IBD_INVALID_PKEY_TBL_SIZE
+} ibd_part_err_t;
+/*
+ * NOTE: If you change this structure make sure that alignments are correct
+ * for the proper operation of the ioctl in both the 32 and 64 bit modes.
+ */
+typedef struct ibd_ioctl_s {
+ int ioc_info_cmd;
+ datalink_id_t ioc_linkid;
+ int ioc_port_inst;
+ uint_t ioc_portnum;
+ ib_guid_t ioc_hcaguid;
+ ib_guid_t ioc_portguid;
+ int ioc_status;
+ uint32_t align1;
+} ibd_ioctl_t;
+
+/*
+ * NOTE: If you change this structure make sure that alignments are correct
+ * for the proper operation of the ioctl in both the 32 and 64 bit modes.
+ */
+typedef struct ibpart_ioctl_s {
+ ibd_ioctl_t ibdioc;
+ datalink_id_t ioc_partid;
+ boolean_t ioc_force_create;
+ ib_pkey_t ioc_pkey;
+ uint16_t align1;
+ uint32_t align2;
+} ibpart_ioctl_t;
+
+typedef struct ibpart_ioctl_s ibd_create_ioctl_t;
+typedef struct ibpart_ioctl_s ibd_delete_ioctl_t;
+
+typedef struct ibport_ioctl_s {
+ ibd_ioctl_t ibdioc;
+ uint_t ioc_pkey_tbl_sz;
+ ib_pkey_t *ioc_pkeys;
+} ibport_ioctl_t;
+
+#ifdef _SYSCALL32
+typedef struct ibport_ioctl32_s {
+ ibd_ioctl_t ibdioc;
+ uint_t ioc_pkey_tbl_sz;
+ caddr32_t ioc_pkeys;
+} ibport_ioctl32_t;
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_IBPART_H */
--- a/usr/src/uts/common/sys/mac.h Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/mac.h Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_MAC_H
@@ -205,6 +204,7 @@
MAC_PROP_MAX_RX_RINGS_AVAIL,
MAC_PROP_MAX_RXHWCLNT_AVAIL,
MAC_PROP_MAX_TXHWCLNT_AVAIL,
+ MAC_PROP_IB_LINKMODE,
MAC_PROP_PRIVATE = -1
} mac_prop_id_t;
--- a/usr/src/uts/intel/Makefile.intel.shared Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/intel/Makefile.intel.shared Wed Apr 14 10:26:18 2010 -0700
@@ -496,7 +496,7 @@
#
# InfiniBand pseudo drivers
#
-DRV_KMODS += ib ibd rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
+DRV_KMODS += ib ibp rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
#
# LVM modules
--- a/usr/src/uts/intel/ibd/Makefile Wed Apr 14 10:17:23 2010 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-#
-
-#
-# Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE = ../..
-
-#
-# Define the module and object file sets.
-#
-MODULE = ibd
-OBJECTS = $(IBD_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-CONF_SRCDIR = $(UTSBASE)/common/io/ib/clients/ibd
-LDFLAGS += -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip
-WARLOCK_OUT = $(IBD_OBJS:%.o=%.ll)
-WARLOCK_OK = $(MODULE).ok
-WLCMD_DIR = $(UTSBASE)/common/io/warlock
-#
-# Include common rules.
-#
-include $(UTSBASE)/intel/Makefile.intel
-
-#
-# Define targets
-#
-ALL_TARGET = $(BINARY) $(CONFMOD)
-LINT_TARGET = $(MODULE).lint
-INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-
-#
-# Default build targets.
-#
-.KEEP_STATE:
-
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
- $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-clobber: $(CLOBBER_DEPS)
- $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
-install: $(INSTALL_DEPS)
-
-#
-# Include common targets.
-#
-include $(UTSBASE)/intel/Makefile.targ
-
-#
-# Defines for local commands.
-#
-WARLOCK = warlock
-WLCC = wlcc
-TOUCH = touch
-TEST = test
-
-warlock: $(WARLOCK_OK)
-
-$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
- $(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
- -l ../warlock/ddi_dki_impl.ll
- $(TOUCH) $@
-
-%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
- $(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
- $(WLCC) $(CPPFLAGS) -DDEBUG -o $@ $<
-
-warlock_ddi.files:
- @cd ../warlock; pwd; $(MAKE) warlock
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/ibp/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,112 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ibp
+OBJECTS = $(IBD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/ib/clients/ibd
+LDFLAGS += -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip \
+ -Nmisc/dls -Nmisc/dld
+WARLOCK_OUT = $(IBD_OBJS:%.o=%.ll)
+WARLOCK_OK = $(MODULE).ok
+WLCMD_DIR = $(UTSBASE)/common/io/warlock
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+ $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+clobber: $(CLOBBER_DEPS)
+ $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Defines for local commands.
+#
+WARLOCK = warlock
+WLCC = wlcc
+TOUCH = touch
+TEST = test
+
+warlock: $(WARLOCK_OK)
+
+$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
+ $(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
+ -l ../warlock/ddi_dki_impl.ll
+ $(TOUCH) $@
+
+%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
+ $(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
+ $(WLCC) $(CPPFLAGS) -DDEBUG -o $@ $<
+
+warlock_ddi.files:
+ @cd ../warlock; pwd; $(MAKE) warlock
--- a/usr/src/uts/sparc/Makefile.sparc.shared Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/sparc/Makefile.sparc.shared Wed Apr 14 10:26:18 2010 -0700
@@ -281,7 +281,7 @@
DRV_KMODS += usbecm
DRV_KMODS += hci1394 av1394 scsa1394 dcam1394
DRV_KMODS += sbp2
-DRV_KMODS += ib ibd rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
+DRV_KMODS += ib ibp rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
DRV_KMODS += pci_pci pcieb pcieb_bcm
DRV_KMODS += i8042 kb8042 mouse8042
DRV_KMODS += fcode
--- a/usr/src/uts/sparc/ibd/Makefile Wed Apr 14 10:17:23 2010 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-#
-
-#
-# Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE = ../..
-
-#
-# Define the module and object file sets.
-#
-MODULE = ibd
-OBJECTS = $(IBD_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
-CONF_SRCDIR = $(UTSBASE)/common/io/ib/clients/ibd
-LDFLAGS += -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip
-WARLOCK_OUT = $(IBD_OBJS:%.o=%.ll)
-WARLOCK_OK = $(MODULE).ok
-WLCMD_DIR = $(UTSBASE)/common/io/warlock
-#
-# Include common rules.
-#
-include $(UTSBASE)/sparc/Makefile.sparc
-
-#
-# Define targets
-#
-ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
-LINT_TARGET = $(MODULE).lint
-INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
-
-#
-# Overrides
-#
-ALL_BUILDS = $(ALL_BUILDSONLY64)
-DEF_BUILDS = $(DEF_BUILDSONLY64)
-CLEANLINTFILES += $(LINT32_FILES)
-
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-
-#
-# Default build targets.
-#
-.KEEP_STATE:
-
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
- $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-clobber: $(CLOBBER_DEPS)
- $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS) lint32
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
-install: $(INSTALL_DEPS)
-
-#
-# Include common targets.
-#
-include $(UTSBASE)/sparc/Makefile.targ
-
-#
-# Defines for local commands.
-#
-WARLOCK = warlock
-WLCC = wlcc
-TOUCH = touch
-TEST = test
-
-warlock: $(WARLOCK_OK) $(WARLOCK_OUT)
-
-$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
- $(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
- -l ../warlock/ddi_dki_impl.ll
- $(TOUCH) $@
-
-%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
- $(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
- $(WLCC) $(CPPFLAGS) -DDEBUG -o $@ $<
-
-warlock_ddi.files:
- @cd ../warlock; pwd; $(MAKE) warlock
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/ibp/Makefile Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,124 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ibp
+OBJECTS = $(IBD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/ib/clients/ibd
+LDFLAGS += -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip \
+ -Nmisc/dls -Nmisc/dld
+WARLOCK_OUT = $(IBD_OBJS:%.o=%.ll)
+WARLOCK_OK = $(MODULE).ok
+WLCMD_DIR = $(UTSBASE)/common/io/warlock
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Overrides
+#
+ALL_BUILDS = $(ALL_BUILDSONLY64)
+DEF_BUILDS = $(DEF_BUILDSONLY64)
+CLEANLINTFILES += $(LINT32_FILES)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+ $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+clobber: $(CLOBBER_DEPS)
+ $(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS) lint32
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
+
+#
+# Defines for local commands.
+#
+WARLOCK = warlock
+WLCC = wlcc
+TOUCH = touch
+TEST = test
+
+warlock: $(WARLOCK_OK) $(WARLOCK_OUT)
+
+$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
+ $(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
+ -l ../warlock/ddi_dki_impl.ll
+ $(TOUCH) $@
+
+%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
+ $(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
+ $(WLCC) $(CPPFLAGS) -DDEBUG -o $@ $<
+
+warlock_ddi.files:
+ @cd ../warlock; pwd; $(MAKE) warlock