6961039 /dev/vt: race in symlink updates can result in kernel heap corruption
authorJerry Gilliam <Jerry.Gilliam@Sun.COM>
Mon, 28 Jun 2010 14:29:09 -0700
changeset 12717 1cb0b42b763e
parent 12716 e5de7a3edad2
child 12718 5d58bbec4ff6
6961039 /dev/vt: race in symlink updates can result in kernel heap corruption
usr/src/uts/common/fs/dev/sdev_subr.c
usr/src/uts/common/fs/dev/sdev_vtops.c
--- a/usr/src/uts/common/fs/dev/sdev_subr.c	Mon Jun 28 13:50:20 2010 -0700
+++ b/usr/src/uts/common/fs/dev/sdev_subr.c	Mon Jun 28 14:29:09 2010 -0700
@@ -2573,6 +2573,15 @@
 
 		/*
 		 * Check validity of node
+		 * Drop invalid and nodes to be skipped.
+		 * A node the validator indicates as stale needs
+		 * to be returned as presumably the node name itself
+		 * is valid and the node data itself will be refreshed
+		 * on lookup.  An application performing a readdir then
+		 * stat on each entry should thus always see consistent
+		 * data.  In any case, it is not possible to synchronize
+		 * with dynamic kernel state, and any view we return can
+		 * never be anything more than a snapshot at a point in time.
 		 */
 		if (vtor) {
 			switch (vtor(dv)) {
@@ -2581,6 +2590,10 @@
 			case SDEV_VTOR_INVALID:
 			case SDEV_VTOR_SKIP:
 				continue;
+			case SDEV_VTOR_STALE:
+				sdcmn_err3(("sdev_readir: %s stale\n",
+				    dv->sdev_name));
+				break;
 			default:
 				cmn_err(CE_PANIC,
 				    "dev fs: validator failed: %s(%p)\n",
--- a/usr/src/uts/common/fs/dev/sdev_vtops.c	Mon Jun 28 13:50:20 2010 -0700
+++ b/usr/src/uts/common/fs/dev/sdev_vtops.c	Mon Jun 28 14:29:09 2010 -0700
@@ -88,15 +88,22 @@
 	return (0);
 }
 
-/*ARGSUSED*/
+/*
+ * Validate that a node is up-to-date and correct.
+ * A validator may not update the node state or
+ * contents as a read lock permits entry by
+ * multiple threads.
+ */
 int
 devvt_validate(struct sdev_node *dv)
 {
 	minor_t min;
 	char *nm = dv->sdev_name;
+	int rval;
 
 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
 	ASSERT(dv->sdev_state == SDEV_READY);
+	ASSERT(RW_LOCK_HELD(&(dv->sdev_dotdot)->sdev_contents));
 
 	/* validate only READY nodes */
 	if (dv->sdev_state != SDEV_READY) {
@@ -110,28 +117,20 @@
 
 	if (strcmp(nm, DEVVT_ACTIVE_NAME) == 0) {
 		char *link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-
 		(void) vt_getactive(link, MAXPATHLEN);
-		if (strcmp(link, dv->sdev_symlink) != 0) {
-			strfree(dv->sdev_symlink);
-			dv->sdev_symlink = strdup(link);
-			dv->sdev_attr->va_size = strlen(link);
-		}
+		rval = (strcmp(link, dv->sdev_symlink) == 0) ?
+		    SDEV_VTOR_VALID : SDEV_VTOR_STALE;
 		kmem_free(link, MAXPATHLEN);
-		return (SDEV_VTOR_VALID);
+		return (rval);
 	}
 
 	if (strcmp(nm, DEVVT_CONSUSER_NAME) == 0) {
 		char *link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-
 		(void) vt_getconsuser(link, MAXPATHLEN);
-		if (strcmp(link, dv->sdev_symlink) != 0) {
-			strfree(dv->sdev_symlink);
-			dv->sdev_symlink = strdup(link);
-			dv->sdev_attr->va_size = strlen(link);
-		}
+		rval = (strcmp(link, dv->sdev_symlink) == 0) ?
+		    SDEV_VTOR_VALID : SDEV_VTOR_STALE;
 		kmem_free(link, MAXPATHLEN);
-		return (SDEV_VTOR_VALID);
+		return (rval);
 	}
 
 	if (devvt_str2minor(nm, &min) != 0) {
@@ -238,6 +237,8 @@
 	major_t maj;
 	minor_t min;
 
+	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+
 	if ((maj = vt_wc_attached()) == (major_t)-1)
 		return;
 
@@ -279,6 +280,36 @@
 }
 
 static void
+devvt_rebuild_stale_link(struct sdev_node *ddv, struct sdev_node *dv)
+{
+	char *link;
+
+	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+
+	ASSERT((strcmp(dv->sdev_name, DEVVT_ACTIVE_NAME) == 0) ||
+	    (strcmp(dv->sdev_name, DEVVT_CONSUSER_NAME) == 0));
+
+	link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+	if (strcmp(dv->sdev_name, DEVVT_ACTIVE_NAME) == 0) {
+		(void) vt_getactive(link, MAXPATHLEN);
+	} else if (strcmp(dv->sdev_name, DEVVT_CONSUSER_NAME) == 0) {
+		(void) vt_getconsuser(link, MAXPATHLEN);
+	}
+
+	if (strcmp(link, dv->sdev_symlink) != 0) {
+		strfree(dv->sdev_symlink);
+		dv->sdev_symlink = strdup(link);
+		dv->sdev_attr->va_size = strlen(link);
+	}
+	kmem_free(link, MAXPATHLEN);
+}
+
+/*
+ * First step in refreshing directory contents.
+ * Remove each invalid entry and rebuild the link
+ * reference for each stale entry.
+ */
+static void
 devvt_prunedir(struct sdev_node *ddv)
 {
 	struct vnode *vp;
@@ -293,31 +324,24 @@
 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
 		next = SDEV_NEXT_ENTRY(ddv, dv);
 
-		/* skip stale nodes */
-		if (dv->sdev_flags & SDEV_STALE)
-			continue;
-
-		/* validate and prune only ready nodes */
-		if (dv->sdev_state != SDEV_READY)
-			continue;
-
 		switch (vtor(dv)) {
 		case SDEV_VTOR_VALID:
+			break;
 		case SDEV_VTOR_SKIP:
-			continue;
+			break;
 		case SDEV_VTOR_INVALID:
+			vp = SDEVTOV(dv);
+			if (vp->v_count != 0)
+				break;
+			/* remove the cached node */
+			SDEV_HOLD(dv);
+			(void) sdev_cache_update(ddv, &dv,
+			    dv->sdev_name, SDEV_CACHE_DELETE);
+			break;
 		case SDEV_VTOR_STALE:
-			sdcmn_err7(("destroy invalid "
-			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
+			devvt_rebuild_stale_link(ddv, dv);
 			break;
 		}
-		vp = SDEVTOV(dv);
-		if (vp->v_count > 0)
-			continue;
-		SDEV_HOLD(dv);
-		/* remove the cache node */
-		(void) sdev_cache_update(ddv, &dv, dv->sdev_name,
-		    SDEV_CACHE_DELETE);
 	}
 }
 
@@ -343,7 +367,10 @@
 	rw_enter(&sdvp->sdev_contents, RW_WRITER);
 #endif
 
-	/* 1. create missed nodes */
+	/* 1.  prune invalid nodes and rebuild stale symlinks */
+	devvt_prunedir(sdvp);
+
+	/* 2. create missing nodes */
 	for (min = 0; min < cnt; min++) {
 		char nm[16];
 
@@ -358,7 +385,7 @@
 			/* skip stale nodes */
 			if (dv->sdev_flags & SDEV_STALE)
 				continue;
-			/* validate and prune only ready nodes */
+			/* validate only ready nodes */
 			if (dv->sdev_state != SDEV_READY)
 				continue;
 			if (strcmp(nm, dv->sdev_name) == 0) {
@@ -371,7 +398,7 @@
 		}
 	}
 
-	/* 2. create active link node and console user link node */
+	/* 3. create active link node and console user link node */
 	found = 0;
 	for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) {
 		next = SDEV_NEXT_ENTRY(sdvp, dv);
@@ -379,7 +406,7 @@
 		/* skip stale nodes */
 		if (dv->sdev_flags & SDEV_STALE)
 			continue;
-		/* validate and prune only ready nodes */
+		/* validate only ready nodes */
 		if (dv->sdev_state != SDEV_READY)
 			continue;
 		if ((strcmp(dv->sdev_name, DEVVT_ACTIVE_NAME) == NULL))
@@ -392,13 +419,9 @@
 	}
 	if (!(found & 0x01))
 		devvt_create_snode(sdvp, DEVVT_ACTIVE_NAME, cred, SDEV_VLINK);
-
 	if (!(found & 0x02))
 		devvt_create_snode(sdvp, DEVVT_CONSUSER_NAME, cred, SDEV_VLINK);
 
-	/* 3. cleanup invalid nodes */
-	devvt_prunedir(sdvp);
-
 #ifndef	__lock_lint
 	rw_downgrade(&sdvp->sdev_contents);
 #else