usr/src/uts/common/fs/zfs/zio.c
changeset 13396 485224e23eb9
parent 13395 6aa29333ca54
child 13399 24f53bc2622e
--- a/usr/src/uts/common/fs/zfs/zio.c	Wed Oct 01 16:57:15 2008 +0100
+++ b/usr/src/uts/common/fs/zfs/zio.c	Mon Oct 06 14:09:06 2008 +0100
@@ -275,7 +275,7 @@
 
 /*
  * ==========================================================================
- * I/O transform callbacks for subblocks and decompression
+ * I/O transform callbacks for subblocks, decompression and decryption
  * ==========================================================================
  */
 static void
@@ -306,11 +306,10 @@
 	int crypt_error = 0;
 	boolean_t isdnode = B_FALSE;
 
-	if (spa_version(zio->io_spa) < SPA_VERSION_CRYPTO)
-		return;
-
+	ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO);
 	ASSERT3U(crypt, !=, ZIO_CRYPT_OFF);
 	ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
+
 	/*
 	 * These asserts are a problem for ZVOLS, remove them for now
 	 * but we should be able to assert at least the checksum case,
@@ -330,15 +329,14 @@
 
 	crypt_error = zio_decrypt_data(crypt, zio->io_spa,
 		    &zio->io_bookmark, bp->blk_birth, isdnode,
-		    data, size, &mac, zio->io_data, zio->io_size);
+		    zio->io_data, zio->io_size, &mac, data, size);
 
 	/*
 	 * One possible failure is not having access to the
 	 * key material that the zboookmark_t says we needed,
-	 * In that case we got ENOENT so post an FMA event, we do this
+	 * In that case we got EAGAIN so post an FMA event, we do this
 	 * here so that we don't need to pass the full zio downwards into
-	 * the crypto functions. Then change the error to EAGAIN so the 
-	 * read can be retried.
+	 * the crypto functions.
 	 */ 
 	if (crypt_error != 0) {
 		if (crypt_error == EAGAIN) {
@@ -622,7 +620,6 @@
 	    ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
 	    ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
 
-	ASSERT(priority == ZIO_PRIORITY_LOG_WRITE && zp != NULL);
 	if (zp != NULL) {
 		zio->io_prop = *zp;
 	}
@@ -837,20 +834,21 @@
 	void *cbuf;
 
 	/*
+	 * XXX TO(re)DO
 	 * Don't add crypto to the pipeline if this is a scrub/resilver
 	 * because we don't need the actual data in that case and the
 	 * decrypt could fail due to a lack of a key (in which case the
 	 * datasets wouldn't be mounted but still need to be scrubbed).
 	 */
 
+	if ((spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) &&
+	    (crypt != ZIO_CRYPT_OFF && crypt != ZIO_CRYPT_INHERIT)) {
+		cbuf = zio_buf_alloc(csize);
+		zio_push_transform(zio, cbuf, csize, csize,
+		    zio_decrypt);
+	}
+
 	if (zio->io_logical == zio) {
-		if ((spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) &&
-		    (crypt != ZIO_CRYPT_OFF && crypt != ZIO_CRYPT_INHERIT)) {
-			cbuf = zio_buf_alloc(csize);
-			zio_push_transform(zio, cbuf, csize, csize,
-			    zio_decrypt);
-		}
-
 		if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
 			cbuf = zio_buf_alloc(csize);
 			zio_push_transform(zio, cbuf, csize, csize,
@@ -870,12 +868,16 @@
 	zio_prop_t *zp = &zio->io_prop;
 	int compress = zp->zp_compress;
 	int crypt = zp->zp_crypt;
+	int checksum = zp->zp_checksum;
+	int type = zp->zp_type;
 	blkptr_t *bp = zio->io_bp;
-	void *cbuf;
+	void *cbuf, *encbuf;
 	uint64_t lsize = zio->io_size;
 	uint64_t csize = lsize;
-	uint64_t cbufsize = 0;
+	uint64_t cbufsize = 0, encbufsize = 0;
 	int pass = 1;
+	zio_cksum_t *mac = NULL;
+	size_t maclen = 0;
 
 	/*
 	 * If our children haven't all reached the ready stage,
@@ -885,8 +887,9 @@
 	    zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
 		return (ZIO_PIPELINE_STOP);
 
-	if (!IO_IS_ALLOCATING(zio))
+	if (!IO_IS_ALLOCATING(zio) && (type != DMU_OT_INTENT_LOG)) {
 		return (ZIO_PIPELINE_CONTINUE);
+	}
 
 	ASSERT(compress != ZIO_COMPRESS_INHERIT);
 
@@ -927,14 +930,14 @@
 
 	if (spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO &&
 	    crypt != ZIO_CRYPT_OFF) {
-		zio_cksum_t mac;
-		boolean_t isdnode;
 		int crypt_error;
-		    
-		isdnode = (zio->io_bookmark.zb_object == 0) &&
-		    (zio->io_bookmark.zb_level == 0);
+		boolean_t isdnode = (type == DMU_OT_DNODE);
+
+		ASSERT(crypt != ZIO_CRYPT_INHERIT);
 
-		if (isdnode && zio->io_bookmark.zb_blkid == 0) {
+		if (isdnode && (zio->io_bookmark.zb_blkid == 0 ||
+		    zio->io_bookmark.zb_level != 0)) {
+			/* Meta dnode ? Something special about this dnode ? */
 			crypt = ZIO_CRYPT_OFF;
 			goto crypt_done;
 		}
@@ -942,33 +945,41 @@
 		crypt_error = zio_encrypt_data(crypt, zio->io_spa,
 		    &zio->io_bookmark, zio->io_txg, isdnode,
 		    zio->io_data, zio->io_size,
-		    &cbuf, &cbufsize, (void *)&mac);
+		    &encbuf, &encbufsize, (void **)&mac);
 
-		/*
-		 * If we don't have access to the key material that the 
-		 * zboookmark_t says we needed, post an FMA event.
-		 */ 
-		if (crypt_error == EAGAIN) {
-			zfs_ereport_post(FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL,
-			    zio->io_spa, zio->io_vd, zio, 0, 0);
-		}
 		if (crypt_error != 0) {
+			if (crypt_error == EAGAIN) {
+				/*
+				 * If we don't have access to the key material
+				 * that the * zboookmark_t says we needed,
+				 * post an FMA event.
+				 */ 
+				zfs_ereport_post(
+				    FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL,
+				    zio->io_spa, zio->io_vd, zio, 0, 0);
+			}
 			zio->io_error = crypt_error;
-			goto crypt_done;
-		} else {
-			/*
-			 * The mac is stored in the blkptr 
-			 * as the top two words of the checksum.
-			 */
-			ASSERT3U(zio_crypt_table[crypt].ci_maclen, ==, 16);
-			bp->blk_cksum.zc_word[2] = BE_64(mac.zc_word[0]);
-			bp->blk_cksum.zc_word[3] = BE_64(mac.zc_word[1]);
-			zio_push_transform(zio, cbuf, csize, cbufsize, NULL);
+			return (ZIO_PIPELINE_STOP);
 		}
+		ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+		zio_push_transform(zio, encbuf, zio->io_size, encbufsize, NULL);
 	}
 crypt_done:
 
 	/*
+	 * If we aren't encrypting make sure the checksum isn't the 
+	 * truncated SHA256+MAC variant - force to SHA256 instead.
+	 */
+	if (checksum == ZIO_CHECKSUM_SHA256_CCM_MAC &&
+	    (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT)) {
+		ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO);
+		checksum = ZIO_CHECKSUM_SHA256;
+	}
+
+	if (type == DMU_OT_INTENT_LOG)
+		return (ZIO_PIPELINE_CONTINUE);
+
+	/*
 	 * The final pass of spa_sync() must be all rewrites, but the first
 	 * few passes offer a trade-off: allocating blocks defers convergence,
 	 * but newly allocated blocks are sequential, so they can be written
@@ -990,15 +1001,43 @@
 	if (csize == 0) {
 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
 	} else {
-		ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
+		ASSERT(checksum != ZIO_CHECKSUM_GANG_HEADER);
 		BP_SET_LSIZE(bp, lsize);
 		BP_SET_PSIZE(bp, csize);
 		BP_SET_COMPRESS(bp, compress);
 		BP_SET_CRYPT(bp, crypt);
-		BP_SET_CHECKSUM(bp, zp->zp_checksum);
+		BP_SET_CHECKSUM(bp, checksum);
 		BP_SET_TYPE(bp, zp->zp_type);
 		BP_SET_LEVEL(bp, zp->zp_level);
 		BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
+		if (mac != NULL) {
+			/*
+			 * The mac is stored in the blkptr as the top two 
+			 * words of the checksum, in bigendian form 
+			 * (same as the checksum).
+			 *
+			 * Currently all MAC's are 16 bytes and all
+			 * crypto "on" values use a MAC.  If an encryption
+			 * mode is added that doesn't have a MAC or has a MAC 
+			 * of a different size this needs updating.
+			 */
+			maclen = zio_crypt_table[crypt].ci_maclen;
+
+			ASSERT3U(maclen, ==, 16);
+			ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+			ASSERT3U(crypt, >, ZIO_CRYPT_INHERIT);
+			ASSERT3U(crypt, <=, ZIO_CRYPT_FUNCTIONS);
+			ASSERT3U(crypt, !=, ZIO_CRYPT_OFF);
+			ASSERT3U(spa_version(zio->io_spa), >=,
+			    SPA_VERSION_CRYPTO);
+
+			bp->blk_cksum.zc_word[2] = BE_64(mac->zc_word[0]);
+			bp->blk_cksum.zc_word[3] = BE_64(mac->zc_word[1]);
+			kmem_free(mac, maclen);
+		} else {
+			ASSERT3U(checksum, !=, ZIO_CHECKSUM_SHA256_CCM_MAC);
+		}
+
 	}
 
 	return (ZIO_PIPELINE_CONTINUE);
@@ -1801,6 +1840,7 @@
 		BP_SET_PSIZE(new_bp, size);
 		BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
 		BP_SET_CHECKSUM(new_bp, ZIO_CHECKSUM_ZILOG);
+		BP_SET_CRYPT(new_bp, ZIO_CRYPT_OFF);
 		BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG);
 		BP_SET_LEVEL(new_bp, 0);
 		BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);