usr/src/uts/common/fs/zfs/zio.c
changeset 13413 7d0fac22dbb9
parent 13411 004347840a93
child 13414 cac89b33d897
--- a/usr/src/uts/common/fs/zfs/zio.c	Tue Oct 21 10:17:52 2008 +0100
+++ b/usr/src/uts/common/fs/zfs/zio.c	Thu Oct 30 18:03:47 2008 +0000
@@ -33,11 +33,10 @@
 #include <sys/zio_compress.h>
 #include <sys/zio_crypt.h>
 #include <sys/zio_checksum.h>
+#include <sys/zil.h>
 
 #include <sys/sdt.h>
 
-#define BLK_PAD_IS_MAC	1	
-
 /*
  * ==========================================================================
  * I/O priority table
@@ -306,30 +305,24 @@
 	int crypt = BP_GET_CRYPT(bp);
 	uint64_t mac[2];
 	int crypt_error = 0;
-	boolean_t isdnode = B_FALSE;
 
 	ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO);
 	ASSERT3U(crypt, !=, ZIO_CRYPT_OFF);
 	ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
-#if BLK_PAD_IS_MAC
+
 	if (type == DMU_OT_INTENT_LOG) {
-		ASSERT3U(BP_GET_CHECKSUM(bp), ==, ZIO_CHECKSUM_ZILOG);
+		zil_trailer_t *ztp;
+		ztp = (zil_trailer_t *)((char *)zio->io_data
+		    + zio->io_size) - 1;
+		mac[0] = BE_64(ztp->zit_mac);
+		mac[1] = 0;
 	} else {
-		ASSERT3U(BP_GET_CHECKSUM(bp), ==, ZIO_CHECKSUM_SHA256);
+		ASSERT3U(16, ==, zio_crypt_table[crypt].ci_maclen);
+		/* MAC is in the blkptr as the top two words of the checksum */
+		ASSERT3U(BP_GET_CHECKSUM(bp), ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+		mac[0] = BE_64(bp->blk_cksum.zc_word[2]);
+		mac[1] = BE_64(bp->blk_cksum.zc_word[3]);
 	}
-	mac[0] = BE_64(bp->blk_pad[0]);
-	mac[1] = BE_64(bp->blk_pad[1]);
-#else
-	ASSERT3U(BP_GET_CHECKSUM(bp), ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
-	ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
-
-	/* MAC is stored in the blkptr as the top two words of the checksum */
-	ASSERT3U(16, ==, zio_crypt_table[crypt].ci_maclen);
-
-	mac[0] = BE_64(bp->blk_cksum.zc_word[2]);
-	mac[1] = BE_64(bp->blk_cksum.zc_word[3]);
-#endif
-
 	crypt_error = zio_decrypt_data(crypt, zio->io_spa,
 	    &zio->io_bookmark, bp->blk_birth, type,
 	    zio->io_data, zio->io_size, &mac, data, size);
@@ -347,7 +340,7 @@
 			 * clear text parts to be passed on.
 			 */
 #ifdef _KERNEL
-			if (type == DMU_OT_DNODE) {
+			if (type == DMU_OT_DNODE || type == DMU_OT_INTENT_LOG) {
 #endif
 				DTRACE_PROBE2(zio__ciphertext__dnode,
 				    blkptr_t *, bp, zbookmark_t *,
@@ -665,6 +658,9 @@
 	 *
 	 * All claims *must* be resolved in the first txg -- before the SPA
 	 * starts allocating blocks -- so that nothing is allocated twice.
+	 *
+	 * This means that the claim is happening for encrypted datasets
+	 * when the key is *not* present.
 	 */
 	ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
 	ASSERT3U(spa_first_txg(spa), <=, txg);
@@ -839,9 +835,6 @@
 	}
 
 	/*
-	 * XXX meta-dnode issue ?
-	 * Should this be guarded with the io_logical like above ?
-	 *
 	 * Don't add crypto to the transform stack if this is a scrub/resilver
 	 * because we don't need the actual data in that case and the
 	 * decrypt could fail due to a lack of a key (in which case the
@@ -856,15 +849,26 @@
 	 * as this is often very early in boot.  That should be okay because
 	 * zil_claim only needs to walk the log chain it doesn't need the
 	 * record contents.
-	 *
-	 * XXX Need to add a ZIO_FLAG_ZILOG_REPLAY flag for when we
-	 * add the "normal" SCRUB exclusion back in again.
 	 */
 	if (BP_GET_CRYPT(bp) != ZIO_CRYPT_OFF &&
-	    spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO &&
-	    !(zio->io_flags & ZIO_FLAG_ZILOG_CHECK)) {
-		cbuf = zio_buf_alloc(csize);
-		zio_push_transform(zio, cbuf, csize, csize, zio_decrypt);
+	    BP_GET_CRYPT(bp) != ZIO_CRYPT_INHERIT &&
+	    spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) {
+		boolean_t decrypt = B_TRUE;
+
+		if (zio->io_flags & ZIO_FLAG_SCRUB ||
+		    zio->io_flags & ZIO_FLAG_RESILVER) {
+			decrypt = B_FALSE;
+		}
+
+		if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+			decrypt = zio->io_flags & ZIO_FLAG_ZILOG_REPLAY;
+		}
+
+		if (decrypt) {
+			cbuf = zio_buf_alloc(csize);
+			zio_push_transform(zio, cbuf, csize, csize,
+			    zio_decrypt);
+		}
 	}
 
 	if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
@@ -881,14 +885,13 @@
 	int crypt = zp->zp_crypt;
 	int checksum = zp->zp_checksum;
 	int type = zp->zp_type;
-	zbookmark_t bookmark = zio->io_bookmark;
 	blkptr_t *bp = zio->io_bp;
 	void *cbuf, *encbuf;
 	uint64_t lsize = zio->io_size;
 	uint64_t csize = lsize;
 	uint64_t cbufsize = 0, encbufsize = 0;
 	int pass = 1;
-	zio_cksum_t *mac = NULL;
+	uint64_t *mac = NULL;
 	size_t maclen = 0;
 
 	/*
@@ -957,11 +960,14 @@
 			if (crypt_error == EAGAIN) {
 				/*
 				 * If we don't have access to the key material
-				 * that the * zboookmark_t says we needed,
+				 * that the zbookmark_t says we needed,
 				 * post an FMA event.
 				 *
-				 * XXX do we need some sort of cap on
-				 * the number of times we try this ?
+				 * This case really shouldn't happen in
+				 * practice though because datasets
+				 * are umounted when the key is removed
+				 * and not mounted up if the key isn't available
+				 * at import or an explicit zfs mount.
 				 */
 				zfs_ereport_post(
 				    FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL,
@@ -969,11 +975,33 @@
 				zio->io_error = EIO;
 			}
 			zio->io_error = crypt_error;
-			return (ZIO_PIPELINE_STOP);
+			return (ZIO_PIPELINE_CONTINUE);
+		}
+
+		if (type == DMU_OT_INTENT_LOG) {
+			zil_trailer_t *ztp;
+			BP_SET_CRYPT(bp, crypt);
+			/*
+			 * For the intent log we have an 8 byte mac
+			 * not 16 like all the other cases.
+			 * It is stored in the zil_trailer_t.
+			 */
+			ztp = (zil_trailer_t *)((char *)encbuf +
+			    encbufsize) - 1;
+			ztp->zit_mac = BE_64(mac[0]);
+			kmem_free(mac, ZIL_CCM_MAC_LEN);
+			mac = NULL;
+		} else {
+			maclen = zio_crypt_table[crypt].ci_maclen;
+			checksum = ZIO_CHECKSUM_SHA256_CCM_MAC;
 		}
 		zio_push_transform(zio, encbuf, zio->io_size, encbufsize, NULL);
-		checksum = ZIO_CHECKSUM_SHA256_CCM_MAC;
-		maclen = zio_crypt_table[crypt].ci_maclen;
+	}
+
+	if (!IO_IS_ALLOCATING(zio)) {
+		/* Must have dealt with the crypto mac if there was one */
+		ASSERT(mac == NULL);
+		return (ZIO_PIPELINE_CONTINUE);
 	}
 
 	/*
@@ -986,11 +1014,6 @@
 		checksum = ZIO_CHECKSUM_SHA256;
 	}
 
-	if (!IO_IS_ALLOCATING(zio)) {
-		BP_SET_CRYPT(bp, crypt);
-		return (ZIO_PIPELINE_CONTINUE);
-	}
-
 	ASSERT(checksum != ZIO_CHECKSUM_INHERIT);
 	ASSERT(compress != ZIO_COMPRESS_INHERIT);
 	ASSERT(spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO &&
@@ -1038,23 +1061,17 @@
 			 * mode is added that doesn't have a MAC or has a MAC
 			 * of a different size this needs updating.
 			 */
-			ASSERT3U(maclen, ==, 16);
-			ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+			ASSERT(maclen == 16);
 			ASSERT3U(crypt, >, ZIO_CRYPT_INHERIT);
 			ASSERT3U(crypt, <=, ZIO_CRYPT_FUNCTIONS);
 			ASSERT3U(crypt, !=, ZIO_CRYPT_OFF);
 			ASSERT3U(spa_version(zio->io_spa), >=,
 			    SPA_VERSION_CRYPTO);
-
-#if BLK_PAD_IS_MAC
-			bp->blk_pad[0] = BE_64(mac->zc_word[0]);
-			bp->blk_pad[1] = BE_64(mac->zc_word[1]);
-			BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_SHA256);
-#else
-			bp->blk_cksum.zc_word[2] = BE_64(mac->zc_word[0]);
-			bp->blk_cksum.zc_word[3] = BE_64(mac->zc_word[1]);
-#endif
+			ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+			bp->blk_cksum.zc_word[2] = BE_64(mac[0]);
+			bp->blk_cksum.zc_word[3] = BE_64(mac[1]);
 			kmem_free(mac, maclen);
+			mac = NULL;
 		} else {
 			ASSERT3U(checksum, !=, ZIO_CHECKSUM_SHA256_CCM_MAC);
 		}
@@ -1845,7 +1862,7 @@
  */
 int
 zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp,
-    uint64_t txg)
+    uint64_t txg, int crypt)
 {
 	int error;
 
@@ -1860,8 +1877,8 @@
 		BP_SET_LSIZE(new_bp, size);
 		BP_SET_PSIZE(new_bp, size);
 		BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
+		BP_SET_CRYPT(new_bp, crypt);
 		BP_SET_CHECKSUM(new_bp, ZIO_CHECKSUM_ZILOG);
-		BP_SET_CRYPT(new_bp, ZIO_CRYPT_OFF);
 		BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG);
 		BP_SET_LEVEL(new_bp, 0);
 		BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);
@@ -2293,10 +2310,8 @@
 			ASSERT(zio->io_children[c][w] == 0);
 
 	if (bp != NULL) {
-#ifndef BLK_PAD_IS_MAC
 		ASSERT(bp->blk_pad[0] == 0);
 		ASSERT(bp->blk_pad[1] == 0);
-#endif
 		ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||
 		    (pio != NULL && bp == pio->io_bp));
 		if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) &&