6735704 deadlock in topo_node_facility()
authorRobert Johnston <Robert.Johnston@Sun.COM>
Fri, 12 Sep 2008 17:16:21 -0700
changeset 7585 6bd9ced8f8a6
parent 7584 07b5a7770d5b
child 7586 82575dc6f82c
6735704 deadlock in topo_node_facility() 6735691 deadlock in topo_node_facbind()
usr/src/cmd/fm/fmtopo/common/fmtopo.c
usr/src/lib/fm/topo/libtopo/common/topo_node.c
usr/src/lib/fm/topo/libtopo/common/topo_snap.c
usr/src/lib/fm/topo/libtopo/common/topo_tree.h
--- a/usr/src/cmd/fm/fmtopo/common/fmtopo.c	Fri Sep 12 16:58:22 2008 -0700
+++ b/usr/src/cmd/fm/fmtopo/common/fmtopo.c	Fri Sep 12 17:16:21 2008 -0700
@@ -24,7 +24,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <sys/fm/protocol.h>
 #include <fm/libtopo.h>
@@ -50,12 +49,11 @@
 
 static const char *opt_R = "/";
 static const char *opt_s = FM_FMRI_SCHEME_HC;
-static const char optstr[] = "bCdeEP:pR:s:StVx";
+static const char optstr[] = "bCdeP:pR:s:StVx";
 
 static int opt_b = 0;
 static int opt_d = 0;
 static int opt_e = 0;
-static int opt_E = 0;
 static int opt_p = 0;
 static int opt_S = 0;
 static int opt_t = 0;
@@ -77,7 +75,7 @@
 usage(FILE *fp)
 {
 	(void) fprintf(fp,
-	    "Usage: %s [-bCeEdpSVx] [-P group.property[=type:value]] "
+	    "Usage: %s [-bCedpSVx] [-P group.property[=type:value]] "
 	    "[-R root] [-s scheme] [fmri]\n", g_pname);
 
 	(void) fprintf(fp,
@@ -85,7 +83,6 @@
 	    "\t-C  dump core after completing execution\n"
 	    "\t-d  set debug mode for libtopo modules\n"
 	    "\t-e  display FMRIs as paths using esc/eft notation\n"
-	    "\t-E  enumerate sensor nodes\n"
 	    "\t-P  get/set specified properties\n"
 	    "\t-p  display of FMRI protocol properties\n"
 	    "\t-R  set root directory for libtopo plug-ins and other files\n"
@@ -799,7 +796,7 @@
 {
 	int err;
 	nvlist_t *nvl;
-	nvlist_t *rsrc, *out;
+	nvlist_t *rsrc;
 	char *s;
 
 	if (opt_e && strcmp(opt_s, FM_FMRI_SCHEME_HC) == 0) {
@@ -830,24 +827,6 @@
 	topo_hdl_strfree(thp, s);
 	nvlist_free(rsrc);
 
-	/*
-	 * If the "-E" option was specified, we want to also enumerate any
-	 * available facility nodes.  To do that we check if the node supports
-	 * a facility enumerator method.  If it exists, then we invoke it to
-	 * enumerate the sensors for this node.
-	 */
-	if (opt_E) {
-		if (topo_method_supported(node, TOPO_METH_FAC_ENUM, 0))
-			if (topo_method_invoke(node, TOPO_METH_FAC_ENUM, 0,
-			    NULL, &out, &err) != 0) {
-				(void) fprintf(stderr,
-				    "topo_method_invoke failed (%s) on node "
-				    "%s=%d\n", topo_strerror(err),
-				    topo_node_name(node),
-				    topo_node_instance(node));
-			}
-	}
-
 	if (opt_V || opt_all) {
 		if ((nvl = topo_prop_getprops(node, &err)) == NULL) {
 			(void) fprintf(stderr, "%s: failed to get "
@@ -1196,9 +1175,6 @@
 			case 'e':
 				opt_e++;
 				break;
-			case 'E':
-				opt_E++;
-				break;
 			case 'P':
 				pcnt++;
 				break;
--- a/usr/src/lib/fm/topo/libtopo/common/topo_node.c	Fri Sep 12 16:58:22 2008 -0700
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_node.c	Fri Sep 12 17:16:21 2008 -0700
@@ -419,9 +419,11 @@
 }
 
 static tnode_t *
-node_bind_seterror(topo_mod_t *mod, tnode_t *pnode, tnode_t *node, int err)
+node_bind_seterror(topo_mod_t *mod, tnode_t *pnode, tnode_t *node,
+    boolean_t pnode_locked, int err)
 {
-	topo_node_unlock(pnode);
+	if (pnode_locked)
+		topo_node_unlock(pnode);
 
 	(void) topo_mod_seterrno(mod, err);
 
@@ -454,12 +456,12 @@
 			if (inst > nhp->th_range.tr_max ||
 			    inst < nhp->th_range.tr_min)
 				return (node_bind_seterror(mod, pnode, NULL,
-				    EMOD_NODE_RANGE));
+				    B_TRUE, EMOD_NODE_RANGE));
 
 			h = topo_node_hash(nhp, inst);
 			if (nhp->th_nodearr[h] != NULL)
 				return (node_bind_seterror(mod, pnode, NULL,
-				    EMOD_NODE_BOUND));
+				    B_TRUE, EMOD_NODE_BOUND));
 			else
 				break;
 
@@ -467,10 +469,12 @@
 	}
 
 	if (nhp == NULL)
-		return (node_bind_seterror(mod, pnode, NULL, EMOD_NODE_NOENT));
+		return (node_bind_seterror(mod, pnode, NULL, B_TRUE,
+		    EMOD_NODE_NOENT));
 
 	if ((node = topo_mod_zalloc(mod, sizeof (tnode_t))) == NULL)
-		return (node_bind_seterror(mod, pnode, NULL, EMOD_NOMEM));
+		return (node_bind_seterror(mod, pnode, NULL, B_TRUE,
+		    EMOD_NOMEM));
 
 	(void) pthread_mutex_init(&node->tn_lock, NULL);
 
@@ -486,14 +490,15 @@
 	topo_mod_hold(mod);
 
 	if (fmri == NULL)
-		return (node_bind_seterror(mod, pnode, node, EMOD_NVL_INVAL));
+		return (node_bind_seterror(mod, pnode, node, B_TRUE,
+		    EMOD_NVL_INVAL));
 
 	if (topo_pgroup_create(node, &protocol_pgroup, &err) < 0)
-		return (node_bind_seterror(mod, pnode, node, err));
+		return (node_bind_seterror(mod, pnode, node, B_TRUE, err));
 
 	if (topo_prop_set_fmri(node, TOPO_PGROUP_PROTOCOL, TOPO_PROP_RESOURCE,
 	    TOPO_PROP_IMMUTABLE, fmri, &err) < 0)
-		return (node_bind_seterror(mod, pnode, node, err));
+		return (node_bind_seterror(mod, pnode, node, B_TRUE, err));
 
 	topo_dprintf(mod->tm_hdl, TOPO_DBG_MODSVC,
 	    "node bound %s=%d/%s=%d\n", topo_node_name(pnode),
@@ -535,31 +540,40 @@
 	if (topo_node_range_create(mod, pnode, name, 0, 0) < 0)
 		return (NULL);  /* mod errno set */
 
+	topo_node_hold(pnode);
 	topo_node_lock(pnode);
 	for (nhp = topo_list_next(&pnode->tn_children); nhp != NULL;
 	    nhp = topo_list_next(nhp)) {
 		if (strcmp(nhp->th_name, name) == 0) {
 
 			if (inst > nhp->th_range.tr_max ||
-			    inst < nhp->th_range.tr_min)
+			    inst < nhp->th_range.tr_min) {
+				topo_node_rele(pnode);
 				return (node_bind_seterror(mod, pnode, NULL,
-				    EMOD_NVL_INVAL));
-
+				    B_TRUE, EMOD_NVL_INVAL));
+			}
 			h = topo_node_hash(nhp, inst);
-			if (nhp->th_nodearr[h] != NULL)
+			if (nhp->th_nodearr[h] != NULL) {
+				topo_node_rele(pnode);
 				return (node_bind_seterror(mod, pnode, NULL,
-				    EMOD_NODE_BOUND));
-			else
+				    B_TRUE, EMOD_NODE_BOUND));
+			} else
 				break;
 
 		}
 	}
+	topo_node_unlock(pnode);
 
-	if (nhp == NULL)
-		return (node_bind_seterror(mod, pnode, NULL, EMOD_NODE_NOENT));
-
-	if ((node = topo_mod_zalloc(mod, sizeof (tnode_t))) == NULL)
-		return (node_bind_seterror(mod, pnode, NULL, EMOD_NOMEM));
+	if (nhp == NULL) {
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, NULL, B_FALSE,
+		    EMOD_NODE_NOENT));
+	}
+	if ((node = topo_mod_zalloc(mod, sizeof (tnode_t))) == NULL) {
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, NULL, B_FALSE,
+		    EMOD_NOMEM));
+	}
 
 	(void) pthread_mutex_init(&node->tn_lock, NULL);
 
@@ -575,26 +589,35 @@
 	/* Ref count module that bound this node */
 	topo_mod_hold(mod);
 
-	if (topo_pgroup_create(node, &protocol_pgroup, &err) < 0)
-		return (node_bind_seterror(mod, pnode, node, err));
-
-	if (topo_mod_nvalloc(mod, &fnvl, NV_UNIQUE_NAME) < 0)
-		return (node_bind_seterror(mod, pnode, node, EMOD_NOMEM));
+	if (topo_pgroup_create(node, &protocol_pgroup, &err) < 0) {
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, node, B_FALSE, err));
+	}
+	if (topo_mod_nvalloc(mod, &fnvl, NV_UNIQUE_NAME) < 0) {
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, node, B_FALSE,
+		    EMOD_NOMEM));
+	}
 	if (nvlist_add_string(fnvl, FM_FMRI_FACILITY_NAME, name) != 0 ||
 	    nvlist_add_string(fnvl, FM_FMRI_FACILITY_TYPE, type) != 0) {
 		nvlist_free(fnvl);
-		return (node_bind_seterror(mod, pnode, node,  EMOD_FMRI_NVL));
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, node,  B_FALSE,
+		    EMOD_FMRI_NVL));
 	}
 
 	if (topo_node_resource(pnode, &pfmri, &err) < 0) {
 		nvlist_free(fnvl);
-		return (node_bind_seterror(mod, pnode, node, err));
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, node, B_FALSE, err));
 	}
 
 	if (nvlist_add_nvlist(pfmri, FM_FMRI_FACILITY, fnvl) != 0) {
 		nvlist_free(fnvl);
 		nvlist_free(pfmri);
-		return (node_bind_seterror(mod, pnode, node,  EMOD_FMRI_NVL));
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, node,  B_FALSE,
+		    EMOD_FMRI_NVL));
 	}
 
 	nvlist_free(fnvl);
@@ -602,7 +625,8 @@
 	if (topo_prop_set_fmri(node, TOPO_PGROUP_PROTOCOL, TOPO_PROP_RESOURCE,
 	    TOPO_PROP_IMMUTABLE, pfmri, &err) < 0) {
 		nvlist_free(pfmri);
-		return (node_bind_seterror(mod, pnode, node, err));
+		topo_node_rele(pnode);
+		return (node_bind_seterror(mod, pnode, node, B_FALSE, err));
 	}
 
 	nvlist_free(pfmri);
@@ -614,9 +638,11 @@
 
 	topo_node_hold(node);
 	nhp->th_nodearr[h] = node;
+
+	topo_node_lock(pnode);
 	++pnode->tn_refs;
-
 	topo_node_unlock(pnode);
+	topo_node_rele(pnode);
 
 	if (topo_pgroup_create(node, &auth_pgroup, &err) == 0) {
 		(void) topo_prop_inherit(node, FM_FMRI_AUTHORITY,
@@ -635,39 +661,12 @@
     uint32_t fac_subtype, topo_faclist_t *faclist, int *errp)
 {
 	tnode_t *tmp;
-	nvlist_t *rsrc, *fac, *out;
+	nvlist_t *rsrc, *fac;
 	char *tmp_factype;
 	uint32_t tmp_facsubtype;
 	boolean_t list_empty = 1;
 	topo_faclist_t *fac_ele;
 
-	/*
-	 * Some facility nodes will be statically enumerated in the xml maps.
-	 * Other may be enumerated on-demand.  We can check for/handle the
-	 * latter case by looking for a node method for doing facility
-	 * enumeration and invoking it, if found.
-	 *
-	 * If the TOPO_FACILITIES_BOUND flag is set on this node, then we've
-	 * already enumerated the facilties for this node (by a previous call
-	 * to topo_node_facility) so there's no need to invoke the enumeration
-	 * method.
-	 */
-	topo_node_lock(node);
-	if (!(node->tn_state & TOPO_FACILITIES_BOUND) &&
-	    topo_method_supported(node, TOPO_METH_FAC_ENUM, 0)) {
-
-		if (topo_method_invoke(node, TOPO_METH_FAC_ENUM, 0, NULL, &out,
-		    errp) != 0) {
-			topo_dprintf(thp, TOPO_DBG_ERR,
-			    "topo_method_invoke failed (%s) on node %s=%d\n",
-			    topo_strerror(*errp), topo_node_name(node),
-			    topo_node_instance(node));
-			topo_node_unlock(node);
-			return (-1);
-		} else
-			node->tn_state |= TOPO_FACILITIES_BOUND;
-	}
-
 	bzero(faclist, sizeof (topo_faclist_t));
 	for (tmp = topo_child_first(node); tmp != NULL;
 	    tmp = topo_child_next(node, tmp)) {
--- a/usr/src/lib/fm/topo/libtopo/common/topo_snap.c	Fri Sep 12 16:58:22 2008 -0700
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_snap.c	Fri Sep 12 17:16:21 2008 -0700
@@ -80,6 +80,7 @@
 #include <uuid/uuid.h>
 
 #include <fm/libtopo.h>
+#include <sys/fm/protocol.h>
 
 #include <topo_alloc.h>
 #include <topo_builtin.h>
@@ -321,19 +322,63 @@
 	return ((char *)uuid);
 }
 
+/*ARGSUSED*/
+static int
+fac_walker(topo_hdl_t *thp, tnode_t *node, void *arg)
+{
+	int err;
+	nvlist_t *out;
+
+	if (topo_method_supported(node, TOPO_METH_FAC_ENUM, 0)) {
+		/*
+		 * If the facility enumeration method fails, note the failure,
+		 * but continue on with the walk.
+		 */
+		if (topo_method_invoke(node, TOPO_METH_FAC_ENUM, 0, NULL, &out,
+		    &err) != 0) {
+			topo_dprintf(thp, TOPO_DBG_ERR,
+			    "facility enumeration method failed on node %s=%d "
+			    "(%s)\n", topo_node_name(node),
+			    topo_node_instance(node), topo_strerror(err));
+		}
+	}
+	return (TOPO_WALK_NEXT);
+}
+
 /*
  * Return snapshot id
  */
 char *
 topo_snap_hold(topo_hdl_t *thp, const char *uuid, int *errp)
 {
+	topo_walk_t *twp;
+
 	if (thp == NULL)
 		return (NULL);
 
-	if (uuid == NULL)
-		return (topo_snap_create(thp, errp));
-	else
-		return (topo_snap_log_create(thp, uuid, errp));
+	if (uuid == NULL) {
+		char *ret;
+
+		ret = topo_snap_create(thp, errp);
+
+		/*
+		 * Now walk the tree and invoke any facility enumeration methods
+		 */
+		if (ret != NULL) {
+			if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
+			    fac_walker, (void *)0, errp)) == NULL) {
+				return (ret);
+			}
+			if (topo_walk_step(twp, TOPO_WALK_CHILD)
+			    != TOPO_WALK_ERR) {
+				topo_walk_fini(twp);
+				return (ret);
+			}
+			topo_walk_fini(twp);
+		}
+		return (ret);
+	}
+	return (topo_snap_log_create(thp, uuid, errp));
 }
 
 /*ARGSUSED*/
--- a/usr/src/lib/fm/topo/libtopo/common/topo_tree.h	Fri Sep 12 16:58:22 2008 -0700
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_tree.h	Fri Sep 12 17:16:21 2008 -0700
@@ -76,12 +76,6 @@
 #define	TOPO_NODE_ROOT		0x0002
 #define	TOPO_NODE_BOUND		0x0004
 #define	TOPO_NODE_LINKED	0x0008
-/*
- * Used by topo_node_facility() to determine whether or not this node's
- * facilities have been enumerated.  This is to avoid unecessarily re-running
- * the facility enumeration method on this node (which can be expensive)
- */
-#define	TOPO_FACILITIES_BOUND	0x0010
 
 typedef struct topo_tree {
 	topo_list_t tt_list;		/* next/prev pointers */