6624956 zfs_log_fuid_ids can cause panic on sparc
authormarks
Thu, 08 Nov 2007 08:07:56 -0800
changeset 5435 1be0be66916d
parent 5434 d0b14f9f9750
child 5436 e2b8f7b9f1c5
6624956 zfs_log_fuid_ids can cause panic on sparc 6626168 zfs can incorrectly log uids for failed mappings 6626566 zfs ace splitting not working quite right
usr/src/uts/common/fs/zfs/sys/zil.h
usr/src/uts/common/fs/zfs/zfs_acl.c
usr/src/uts/common/fs/zfs/zfs_fuid.c
usr/src/uts/common/fs/zfs/zfs_log.c
usr/src/uts/common/fs/zfs/zfs_replay.c
--- a/usr/src/uts/common/fs/zfs/sys/zil.h	Thu Nov 08 06:34:22 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h	Thu Nov 08 08:07:56 2007 -0800
@@ -105,6 +105,14 @@
 #define	ZIL_XVAT_SIZE(mapsize) \
 	sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
 	(sizeof (uint64_t) * 7)
+
+/*
+ * Size of ACL in log.  The ACE data is padded out to properly align
+ * on 8 byte boundary.
+ */
+
+#define	ZIL_ACE_LENGTH(x)	(roundup(x, sizeof (uint64_t)))
+
 /*
  * Intent log transaction types and record structures
  */
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c	Thu Nov 08 06:34:22 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c	Thu Nov 08 08:07:56 2007 -0800
@@ -1365,7 +1365,7 @@
 zfs_acl_split_ace(zfs_acl_t *aclp, zfs_ace_hdr_t *acep)
 {
 	zfs_acl_node_t *aclnode;
-	zfs_acl_node_t *currnode = zfs_acl_curr_node(aclp);
+	zfs_acl_node_t *currnode;
 	void  *newacep;
 	uint16_t type, flags;
 	uint32_t mask;
@@ -1387,6 +1387,8 @@
 	aclp->z_next_ace = acep;
 	flags &= ~ALL_INHERIT;
 	aclp->z_ops.ace_flags_set(acep, flags);
+	currnode = zfs_acl_curr_node(aclp);
+	ASSERT(currnode->z_ace_idx >= 1);
 	currnode->z_ace_idx -= 1;
 }
 
--- a/usr/src/uts/common/fs/zfs/zfs_fuid.c	Thu Nov 08 06:34:22 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_fuid.c	Thu Nov 08 08:07:56 2007 -0800
@@ -217,6 +217,16 @@
 	fuid_domain_t searchnode, *findnode;
 	avl_index_t loc;
 
+	/*
+	 * If the dummy "nobody" domain then return an index of 0
+	 * to cause the created FUID to be a standard POSIX id
+	 * for the user nobody.
+	 */
+	if (domain[0] == '\0') {
+		*retdomain = "";
+		return (0);
+	}
+
 	searchnode.f_ksid = ksid_lookupdomain(domain);
 	if (retdomain) {
 		*retdomain = searchnode.f_ksid->kd_name;
@@ -482,13 +492,7 @@
 }
 
 /*
- * Create a file system FUID
- *
- * During a replay operation the id will be incorrect and
- * will be ignored.  In this case replay must be true and the
- * cred will have a ksid_t attached to it.
- *
- * A mapped uid/gid would have a ksid_t attached to the cred.
+ * Create a file system FUID, based on information in the users cred
  */
 uint64_t
 zfs_fuid_create_cred(zfsvfs_t *zfsvfs, uint64_t id,
@@ -525,6 +529,10 @@
  * we can't find the domain + rid information in the
  * cred.  Instead we have to query Winchester for the
  * domain and rid.
+ *
+ * During replay operations the domain+rid information is
+ * found in the zfs_fuid_info_t that the replay code has
+ * attached to the zfsvfs of the file system.
  */
 uint64_t
 zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id,
@@ -582,8 +590,15 @@
 		else
 			status = kidmap_getsidbygid(id, &domain, &rid);
 
-		if (status != 0)
-			return (UID_NOBODY);
+		if (status != 0) {
+			/*
+			 * When returning nobody we will need to
+			 * make a dummy fuid table entry for logging
+			 * purposes.
+			 */
+			rid = UID_NOBODY;
+			domain = "";
+		}
 
 	}
 
@@ -719,6 +734,5 @@
 	 * Not found in ksidlist, check posix groups
 	 */
 	zfs_fuid_map_id(zfsvfs, id, ZFS_GROUP, &gid);
-
 	return (groupmember(gid, cr));
 }
--- a/usr/src/uts/common/fs/zfs/zfs_log.c	Thu Nov 08 06:34:22 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c	Thu Nov 08 08:07:56 2007 -0800
@@ -255,7 +255,7 @@
 		aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0;
 		txsize =
 		    sizeof (lr_acl_create_t) + namesize + fuidsz +
-		    aclsize + xvatsize;
+		    ZIL_ACE_LENGTH(aclsize) + xvatsize;
 		lrsize = sizeof (lr_acl_create_t);
 	}
 
@@ -304,7 +304,7 @@
 			lracl->lr_acl_flags = 0;
 
 		bcopy(vsecp->vsa_aclentp, end, aclsize);
-		end = (caddr_t)end + aclsize;
+		end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
 	}
 
 	/* drop in FUID info */
@@ -648,7 +648,9 @@
 	if (zilog == NULL || zp->z_unlinked)
 		return;
 
-	txsize = lrsize + aclbytes + (fuidp ? fuidp->z_domain_str_sz : 0) +
+	txsize = lrsize +
+	    ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) +
+	    (fuidp ? fuidp->z_domain_str_sz : 0) +
 	    sizeof (uint64) * (fuidp ? fuidp->z_fuid_cnt : 0);
 
 	itx = zil_itx_create(txtype, txsize);
@@ -674,7 +676,7 @@
 
 		bcopy(vsecp->vsa_aclentp, start, aclbytes);
 
-		start = (caddr_t)start + aclbytes;
+		start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
 
 		if (fuidp) {
 			start = zfs_log_fuid_ids(fuidp, start);
--- a/usr/src/uts/common/fs/zfs/zfs_replay.c	Thu Nov 08 06:34:22 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_replay.c	Thu Nov 08 08:07:56 2007 -0800
@@ -82,7 +82,9 @@
 	xoptattr_t *xoap = NULL;
 	uint64_t *attrs;
 	uint64_t *crtime;
+	uint32_t *bitmap;
 	void *scanstamp;
+	int i;
 
 	xvap->xva_vattr.va_mask |= AT_XVATTR;
 	if ((xoap = xva_getxoptattr(xvap)) == NULL) {
@@ -91,8 +93,11 @@
 	}
 
 	ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize);
-	bcopy(&lrattr->lr_attr_bitmap, xvap->xva_reqattrmap,
-	    xvap->xva_mapsize);
+
+	bitmap = &lrattr->lr_attr_bitmap;
+	for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++)
+		xvap->xva_reqattrmap[i] = *bitmap;
+
 	attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1);
 	crtime = attrs + 1;
 	scanstamp = (caddr_t)(crtime + 2);
@@ -241,9 +246,10 @@
 	/* swap the lr_attr structure */
 	byteswap_uint32_array(lrattr, sizeof (*lrattr));
 	/* swap the bitmap */
-	byteswap_uint32_array(lrattr + 1, lrattr->lr_attr_masksize - 1);
+	byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) *
+	    sizeof (uint32_t));
 	/* swap the attributes, create time + 64 bit word for attributes */
-	byteswap_uint64_array(lrattr + (sizeof (uint32_t) *
+	byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) *
 	    (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t));
 }
 
@@ -274,7 +280,7 @@
 		txtype = (int)lr->lr_common.lrc_txtype;
 		if (txtype == TX_CREATE_ACL_ATTR ||
 		    txtype == TX_MKDIR_ACL_ATTR) {
-			lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
+			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
 			zfs_replay_swap_attrs(lrattr);
 			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
 		}
@@ -284,7 +290,8 @@
 		/* swap fuids */
 		if (lracl->lr_fuidcnt) {
 			byteswap_uint64_array((caddr_t)aclstart +
-			    lracl->lr_acl_bytes, sizeof (uint64_t));
+			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
+			    lracl->lr_fuidcnt * sizeof (uint64_t));
 		}
 	}
 
@@ -314,7 +321,8 @@
 	switch ((int)lr->lr_common.lrc_txtype) {
 	case TX_CREATE_ACL:
 		aclstart = (caddr_t)(lracl + 1);
-		fuidstart = (caddr_t)aclstart + lracl->lr_acl_bytes;
+		fuidstart = (caddr_t)aclstart +
+		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
 		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
 		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
 		    lr->lr_uid, lr->lr_gid);
@@ -333,7 +341,7 @@
 		vsec.vsa_aclflags = lracl->lr_acl_flags;
 		if (zfsvfs->z_fuid_replay == NULL)
 			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
-			    lracl->lr_acl_bytes;
+			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
 			zfsvfs->z_fuid_replay =
 			    zfs_replay_fuids(fuidstart,
 			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
@@ -344,7 +352,8 @@
 		break;
 	case TX_MKDIR_ACL:
 		aclstart = (caddr_t)(lracl + 1);
-		fuidstart = (caddr_t)aclstart + lracl->lr_acl_bytes;
+		fuidstart = (caddr_t)aclstart +
+		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
 		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
 		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
 		    lr->lr_uid, lr->lr_gid);
@@ -362,7 +371,7 @@
 		vsec.vsa_aclflags = lracl->lr_acl_flags;
 		if (zfsvfs->z_fuid_replay == NULL)
 			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
-			    lracl->lr_acl_bytes;
+			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
 			zfsvfs->z_fuid_replay =
 			    zfs_replay_fuids(fuidstart,
 			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
@@ -740,6 +749,11 @@
 	znode_t *zp;
 	int error;
 
+	if (byteswap) {
+		byteswap_uint64_array(lr, sizeof (*lr));
+		zfs_oldace_byteswap(ace, lr->lr_aclcnt);
+	}
+
 	if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
 		/*
 		 * As we can log acls out of order, it's possible the
@@ -751,11 +765,6 @@
 		return (error);
 	}
 
-	if (byteswap) {
-		byteswap_uint64_array(lr, sizeof (*lr));
-		zfs_oldace_byteswap(ace, lr->lr_aclcnt);
-	}
-
 	bzero(&vsa, sizeof (vsa));
 	vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
 	vsa.vsa_aclcnt = lr->lr_aclcnt;
@@ -790,6 +799,16 @@
 	znode_t *zp;
 	int error;
 
+	if (byteswap) {
+		byteswap_uint64_array(lr, sizeof (*lr));
+		zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
+		if (lr->lr_fuidcnt) {
+			byteswap_uint64_array((caddr_t)ace +
+			    ZIL_ACE_LENGTH(lr->lr_acl_bytes),
+			    lr->lr_fuidcnt * sizeof (uint64_t));
+		}
+	}
+
 	if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
 		/*
 		 * As we can log acls out of order, it's possible the
@@ -801,15 +820,6 @@
 		return (error);
 	}
 
-	if (byteswap) {
-		byteswap_uint64_array(lr, sizeof (*lr));
-		zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
-		if (lr->lr_fuidcnt) {
-			byteswap_uint64_array((caddr_t)ace + lr->lr_acl_bytes,
-			    lr->lr_fuidcnt * sizeof (uint64_t));
-		}
-	}
-
 	bzero(&vsa, sizeof (vsa));
 	vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS;
 	vsa.vsa_aclcnt = lr->lr_aclcnt;
@@ -818,7 +828,8 @@
 	vsa.vsa_aclflags = lr->lr_acl_flags;
 
 	if (lr->lr_fuidcnt) {
-		void *fuidstart = (caddr_t)ace + lr->lr_acl_bytes;
+		void *fuidstart = (caddr_t)ace +
+		    ZIL_ACE_LENGTH(lr->lr_acl_bytes);
 
 		zfsvfs->z_fuid_replay =
 		    zfs_replay_fuids(fuidstart, &fuidstart,