--- a/usr/src/uts/common/fs/zfs/zio.c Wed Oct 01 16:57:15 2008 +0100
+++ b/usr/src/uts/common/fs/zfs/zio.c Mon Oct 06 14:09:06 2008 +0100
@@ -275,7 +275,7 @@
/*
* ==========================================================================
- * I/O transform callbacks for subblocks and decompression
+ * I/O transform callbacks for subblocks, decompression and decryption
* ==========================================================================
*/
static void
@@ -306,11 +306,10 @@
int crypt_error = 0;
boolean_t isdnode = B_FALSE;
- if (spa_version(zio->io_spa) < SPA_VERSION_CRYPTO)
- return;
-
+ ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO);
ASSERT3U(crypt, !=, ZIO_CRYPT_OFF);
ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
+
/*
* These asserts are a problem for ZVOLS, remove them for now
* but we should be able to assert at least the checksum case,
@@ -330,15 +329,14 @@
crypt_error = zio_decrypt_data(crypt, zio->io_spa,
&zio->io_bookmark, bp->blk_birth, isdnode,
- data, size, &mac, zio->io_data, zio->io_size);
+ zio->io_data, zio->io_size, &mac, data, size);
/*
* One possible failure is not having access to the
* key material that the zboookmark_t says we needed,
- * In that case we got ENOENT so post an FMA event, we do this
+ * In that case we got EAGAIN so post an FMA event, we do this
* here so that we don't need to pass the full zio downwards into
- * the crypto functions. Then change the error to EAGAIN so the
- * read can be retried.
+ * the crypto functions.
*/
if (crypt_error != 0) {
if (crypt_error == EAGAIN) {
@@ -622,7 +620,6 @@
ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
- ASSERT(priority == ZIO_PRIORITY_LOG_WRITE && zp != NULL);
if (zp != NULL) {
zio->io_prop = *zp;
}
@@ -837,20 +834,21 @@
void *cbuf;
/*
+ * XXX TO(re)DO
* Don't add crypto to the pipeline if this is a scrub/resilver
* because we don't need the actual data in that case and the
* decrypt could fail due to a lack of a key (in which case the
* datasets wouldn't be mounted but still need to be scrubbed).
*/
+ if ((spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) &&
+ (crypt != ZIO_CRYPT_OFF && crypt != ZIO_CRYPT_INHERIT)) {
+ cbuf = zio_buf_alloc(csize);
+ zio_push_transform(zio, cbuf, csize, csize,
+ zio_decrypt);
+ }
+
if (zio->io_logical == zio) {
- if ((spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) &&
- (crypt != ZIO_CRYPT_OFF && crypt != ZIO_CRYPT_INHERIT)) {
- cbuf = zio_buf_alloc(csize);
- zio_push_transform(zio, cbuf, csize, csize,
- zio_decrypt);
- }
-
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
cbuf = zio_buf_alloc(csize);
zio_push_transform(zio, cbuf, csize, csize,
@@ -870,12 +868,16 @@
zio_prop_t *zp = &zio->io_prop;
int compress = zp->zp_compress;
int crypt = zp->zp_crypt;
+ int checksum = zp->zp_checksum;
+ int type = zp->zp_type;
blkptr_t *bp = zio->io_bp;
- void *cbuf;
+ void *cbuf, *encbuf;
uint64_t lsize = zio->io_size;
uint64_t csize = lsize;
- uint64_t cbufsize = 0;
+ uint64_t cbufsize = 0, encbufsize = 0;
int pass = 1;
+ zio_cksum_t *mac = NULL;
+ size_t maclen = 0;
/*
* If our children haven't all reached the ready stage,
@@ -885,8 +887,9 @@
zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
return (ZIO_PIPELINE_STOP);
- if (!IO_IS_ALLOCATING(zio))
+ if (!IO_IS_ALLOCATING(zio) && (type != DMU_OT_INTENT_LOG)) {
return (ZIO_PIPELINE_CONTINUE);
+ }
ASSERT(compress != ZIO_COMPRESS_INHERIT);
@@ -927,14 +930,14 @@
if (spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO &&
crypt != ZIO_CRYPT_OFF) {
- zio_cksum_t mac;
- boolean_t isdnode;
int crypt_error;
-
- isdnode = (zio->io_bookmark.zb_object == 0) &&
- (zio->io_bookmark.zb_level == 0);
+ boolean_t isdnode = (type == DMU_OT_DNODE);
+
+ ASSERT(crypt != ZIO_CRYPT_INHERIT);
- if (isdnode && zio->io_bookmark.zb_blkid == 0) {
+ if (isdnode && (zio->io_bookmark.zb_blkid == 0 ||
+ zio->io_bookmark.zb_level != 0)) {
+ /* Meta dnode ? Something special about this dnode ? */
crypt = ZIO_CRYPT_OFF;
goto crypt_done;
}
@@ -942,33 +945,41 @@
crypt_error = zio_encrypt_data(crypt, zio->io_spa,
&zio->io_bookmark, zio->io_txg, isdnode,
zio->io_data, zio->io_size,
- &cbuf, &cbufsize, (void *)&mac);
+ &encbuf, &encbufsize, (void **)&mac);
- /*
- * If we don't have access to the key material that the
- * zboookmark_t says we needed, post an FMA event.
- */
- if (crypt_error == EAGAIN) {
- zfs_ereport_post(FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL,
- zio->io_spa, zio->io_vd, zio, 0, 0);
- }
if (crypt_error != 0) {
+ if (crypt_error == EAGAIN) {
+ /*
+ * If we don't have access to the key material
+ * that the * zboookmark_t says we needed,
+ * post an FMA event.
+ */
+ zfs_ereport_post(
+ FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL,
+ zio->io_spa, zio->io_vd, zio, 0, 0);
+ }
zio->io_error = crypt_error;
- goto crypt_done;
- } else {
- /*
- * The mac is stored in the blkptr
- * as the top two words of the checksum.
- */
- ASSERT3U(zio_crypt_table[crypt].ci_maclen, ==, 16);
- bp->blk_cksum.zc_word[2] = BE_64(mac.zc_word[0]);
- bp->blk_cksum.zc_word[3] = BE_64(mac.zc_word[1]);
- zio_push_transform(zio, cbuf, csize, cbufsize, NULL);
+ return (ZIO_PIPELINE_STOP);
}
+ ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+ zio_push_transform(zio, encbuf, zio->io_size, encbufsize, NULL);
}
crypt_done:
/*
+ * If we aren't encrypting make sure the checksum isn't the
+ * truncated SHA256+MAC variant - force to SHA256 instead.
+ */
+ if (checksum == ZIO_CHECKSUM_SHA256_CCM_MAC &&
+ (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT)) {
+ ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO);
+ checksum = ZIO_CHECKSUM_SHA256;
+ }
+
+ if (type == DMU_OT_INTENT_LOG)
+ return (ZIO_PIPELINE_CONTINUE);
+
+ /*
* The final pass of spa_sync() must be all rewrites, but the first
* few passes offer a trade-off: allocating blocks defers convergence,
* but newly allocated blocks are sequential, so they can be written
@@ -990,15 +1001,43 @@
if (csize == 0) {
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
} else {
- ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
+ ASSERT(checksum != ZIO_CHECKSUM_GANG_HEADER);
BP_SET_LSIZE(bp, lsize);
BP_SET_PSIZE(bp, csize);
BP_SET_COMPRESS(bp, compress);
BP_SET_CRYPT(bp, crypt);
- BP_SET_CHECKSUM(bp, zp->zp_checksum);
+ BP_SET_CHECKSUM(bp, checksum);
BP_SET_TYPE(bp, zp->zp_type);
BP_SET_LEVEL(bp, zp->zp_level);
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
+ if (mac != NULL) {
+ /*
+ * The mac is stored in the blkptr as the top two
+ * words of the checksum, in bigendian form
+ * (same as the checksum).
+ *
+ * Currently all MAC's are 16 bytes and all
+ * crypto "on" values use a MAC. If an encryption
+ * mode is added that doesn't have a MAC or has a MAC
+ * of a different size this needs updating.
+ */
+ maclen = zio_crypt_table[crypt].ci_maclen;
+
+ ASSERT3U(maclen, ==, 16);
+ ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_CCM_MAC);
+ ASSERT3U(crypt, >, ZIO_CRYPT_INHERIT);
+ ASSERT3U(crypt, <=, ZIO_CRYPT_FUNCTIONS);
+ ASSERT3U(crypt, !=, ZIO_CRYPT_OFF);
+ ASSERT3U(spa_version(zio->io_spa), >=,
+ SPA_VERSION_CRYPTO);
+
+ bp->blk_cksum.zc_word[2] = BE_64(mac->zc_word[0]);
+ bp->blk_cksum.zc_word[3] = BE_64(mac->zc_word[1]);
+ kmem_free(mac, maclen);
+ } else {
+ ASSERT3U(checksum, !=, ZIO_CHECKSUM_SHA256_CCM_MAC);
+ }
+
}
return (ZIO_PIPELINE_CONTINUE);
@@ -1801,6 +1840,7 @@
BP_SET_PSIZE(new_bp, size);
BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
BP_SET_CHECKSUM(new_bp, ZIO_CHECKSUM_ZILOG);
+ BP_SET_CRYPT(new_bp, ZIO_CRYPT_OFF);
BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG);
BP_SET_LEVEL(new_bp, 0);
BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);