2703 add mechanism to report ZFS send progress
authorBill Pijewski <wdp@joyent.com>
Wed, 09 May 2012 15:05:14 -0700
changeset 13686 4bc0783f6064
parent 13685 cdfded691c93
child 13687 72ce76fa37fb
2703 add mechanism to report ZFS send progress Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Robert Mustacchi <[email protected]> Reviewed by: Richard Lowe <[email protected]> Approved by: Eric Schrock <[email protected]>
usr/src/cmd/truss/codes.c
usr/src/cmd/zfs/zfs_main.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_sendrecv.c
usr/src/lib/libzpool/common/sys/zfs_context.h
usr/src/man/man1m/zfs.1m
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_impl.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/truss/codes.c	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/cmd/truss/codes.c	Wed May 09 15:05:14 2012 -0700
@@ -23,6 +23,7 @@
  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
@@ -1256,6 +1257,8 @@
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_POOL_REOPEN,		"ZFS_IOC_POOL_REOPEN",
 		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_SEND_PROGRESS,	"ZFS_IOC_SEND_PROGRESS",
+		"zfs_cmd_t" },
 
 	/* kssl ioctls */
 	{ (uint_t)KSSL_ADD_ENTRY,		"KSSL_ADD_ENTRY",
--- a/usr/src/cmd/zfs/zfs_main.c	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/cmd/zfs/zfs_main.c	Wed May 09 15:05:14 2012 -0700
@@ -24,6 +24,7 @@
  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <assert.h>
@@ -3525,6 +3526,7 @@
 			if (flags.verbose)
 				extraverbose = B_TRUE;
 			flags.verbose = B_TRUE;
+			flags.progress = B_TRUE;
 			break;
 		case 'D':
 			flags.dedup = B_TRUE;
--- a/usr/src/lib/libzfs/common/libzfs.h	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs.h	Wed May 09 15:05:14 2012 -0700
@@ -23,6 +23,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef	_LIBZFS_H
@@ -557,6 +558,9 @@
 
 	/* parsable verbose output (ie. -P) */
 	boolean_t parsable;
+
+	/* show progress (ie. -v) */
+	boolean_t progress;
 } sendflags_t;
 
 typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Wed May 09 15:05:14 2012 -0700
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <assert.h>
@@ -37,6 +38,7 @@
 #include <sys/mount.h>
 #include <pthread.h>
 #include <umem.h>
+#include <time.h>
 
 #include <libzfs.h>
 
@@ -62,6 +64,12 @@
 	libzfs_handle_t  *dedup_hdl;
 } dedup_arg_t;
 
+typedef struct progress_arg {
+	zfs_handle_t *pa_zhp;
+	int pa_fd;
+	boolean_t pa_parsable;
+} progress_arg_t;
+
 typedef struct dataref {
 	uint64_t ref_guid;
 	uint64_t ref_object;
@@ -781,7 +789,7 @@
 	char prevsnap[ZFS_MAXNAMELEN];
 	uint64_t prevsnap_obj;
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
-	boolean_t verbose, dryrun, parsable;
+	boolean_t verbose, dryrun, parsable, progress;
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
@@ -973,10 +981,60 @@
 	return (error);
 }
 
+static void *
+send_progress_thread(void *arg)
+{
+	progress_arg_t *pa = arg;
+
+	zfs_cmd_t zc = { 0 };
+	zfs_handle_t *zhp = pa->pa_zhp;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	unsigned long long bytes;
+	char buf[16];
+
+	time_t t;
+	struct tm *tm;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	if (!pa->pa_parsable)
+		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
+
+	/*
+	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
+	 */
+	for (;;) {
+		(void) sleep(1);
+
+		zc.zc_cookie = pa->pa_fd;
+		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
+			return ((void *)-1);
+
+		(void) time(&t);
+		tm = localtime(&t);
+		bytes = zc.zc_cookie;
+
+		if (pa->pa_parsable) {
+			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
+			    tm->tm_hour, tm->tm_min, tm->tm_sec,
+			    bytes, zhp->zfs_name);
+		} else {
+			zfs_nicenum(bytes, buf, sizeof (buf));
+			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
+			    tm->tm_hour, tm->tm_min, tm->tm_sec,
+			    buf, zhp->zfs_name);
+		}
+	}
+}
+
 static int
 dump_snapshot(zfs_handle_t *zhp, void *arg)
 {
 	send_dump_data_t *sdd = arg;
+	progress_arg_t pa = { 0 };
+	pthread_t tid;
+
 	char *thissnap;
 	int err;
 	boolean_t isfromsnap, istosnap, fromorigin;
@@ -1094,8 +1152,29 @@
 	}
 
 	if (!sdd->dryrun) {
+		/*
+		 * If progress reporting is requested, spawn a new thread to
+		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
+		 */
+		if (sdd->progress) {
+			pa.pa_zhp = zhp;
+			pa.pa_fd = sdd->outfd;
+			pa.pa_parsable = sdd->parsable;
+
+			if (err = pthread_create(&tid, NULL,
+			    send_progress_thread, &pa)) {
+				zfs_close(zhp);
+				return (err);
+			}
+		}
+
 		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
 		    fromorigin, sdd->outfd, sdd->debugnv);
+
+		if (sdd->progress) {
+			(void) pthread_cancel(tid);
+			(void) pthread_join(tid, NULL);
+		}
 	}
 
 	(void) strcpy(sdd->prevsnap, thissnap);
@@ -1445,12 +1524,13 @@
 	sdd.fsavl = fsavl;
 	sdd.verbose = flags->verbose;
 	sdd.parsable = flags->parsable;
+	sdd.progress = flags->progress;
 	sdd.dryrun = flags->dryrun;
 	sdd.filter_cb = filter_func;
 	sdd.filter_cb_arg = cb_arg;
 	if (debugnvp)
 		sdd.debugnv = *debugnvp;
-	if (holdsnaps) {
+	if (holdsnaps || flags->progress) {
 		++holdseq;
 		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
 		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h	Wed May 09 15:05:14 2012 -0700
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
@@ -214,6 +215,7 @@
 };
 
 extern struct proc p0;
+#define	curproc		(&p0)
 
 #define	PS_NONE		-1
 
--- a/usr/src/man/man1m/zfs.1m	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/man/man1m/zfs.1m	Wed May 09 15:05:14 2012 -0700
@@ -2,6 +2,7 @@
 .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright (c) 2012 by Delphix. All rights reserved.
 .\" Copyright (c) 2012 Nexenta Systems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, Joyent, Inc. All rights reserved.
 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
 .\"  See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with
 .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -2845,7 +2846,8 @@
 .ad
 .sp .6
 .RS 4n
-Print verbose information about the stream package generated.
+Print verbose information about the stream package generated.  This information
+includes a per-second report of how much data has been sent.
 .RE
 
 The format of the stream is committed. You will be able to receive your streams
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Wed May 09 15:05:14 2012 -0700
@@ -25,6 +25,7 @@
 /*
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -54,48 +55,30 @@
 
 static char *dmu_recv_tag = "dmu_recv_tag";
 
-/*
- * The list of data whose inclusion in a send stream can be pending from
- * one call to backup_cb to another.  Multiple calls to dump_free() and
- * dump_freeobjects() can be aggregated into a single DRR_FREE or
- * DRR_FREEOBJECTS replay record.
- */
-typedef enum {
-	PENDING_NONE,
-	PENDING_FREE,
-	PENDING_FREEOBJECTS
-} pendop_t;
-
-struct backuparg {
-	dmu_replay_record_t *drr;
-	vnode_t *vp;
-	offset_t *off;
-	objset_t *os;
-	zio_cksum_t zc;
-	uint64_t toguid;
-	int err;
-	pendop_t pending_op;
-};
-
 static int
-dump_bytes(struct backuparg *ba, void *buf, int len)
+dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
 {
+	dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
 	ssize_t resid; /* have to get resid to get detailed errno */
 	ASSERT3U(len % 8, ==, 0);
 
-	fletcher_4_incremental_native(buf, len, &ba->zc);
-	ba->err = vn_rdwr(UIO_WRITE, ba->vp,
+	fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
+	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
 	    (caddr_t)buf, len,
 	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
-	*ba->off += len;
-	return (ba->err);
+
+	mutex_enter(&ds->ds_sendstream_lock);
+	*dsp->dsa_off += len;
+	mutex_exit(&ds->ds_sendstream_lock);
+
+	return (dsp->dsa_err);
 }
 
 static int
-dump_free(struct backuparg *ba, uint64_t object, uint64_t offset,
+dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
     uint64_t length)
 {
-	struct drr_free *drrf = &(ba->drr->drr_u.drr_free);
+	struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
 
 	/*
 	 * If there is a pending op, but it's not PENDING_FREE, push it out,
@@ -104,13 +87,15 @@
 	 * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
 	 * aggregated with other DRR_FREEOBJECTS records.
 	 */
-	if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) {
-		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dsp->dsa_pending_op != PENDING_NONE &&
+	    dsp->dsa_pending_op != PENDING_FREE) {
+		if (dump_bytes(dsp, dsp->dsa_drr,
+		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
-		ba->pending_op = PENDING_NONE;
+		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
-	if (ba->pending_op == PENDING_FREE) {
+	if (dsp->dsa_pending_op == PENDING_FREE) {
 		/*
 		 * There should never be a PENDING_FREE if length is -1
 		 * (because dump_dnode is the only place where this
@@ -128,34 +113,35 @@
 			return (0);
 		} else {
 			/* not a continuation.  Push out pending record */
-			if (dump_bytes(ba, ba->drr,
+			if (dump_bytes(dsp, dsp->dsa_drr,
 			    sizeof (dmu_replay_record_t)) != 0)
 				return (EINTR);
-			ba->pending_op = PENDING_NONE;
+			dsp->dsa_pending_op = PENDING_NONE;
 		}
 	}
 	/* create a FREE record and make it pending */
-	bzero(ba->drr, sizeof (dmu_replay_record_t));
-	ba->drr->drr_type = DRR_FREE;
+	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+	dsp->dsa_drr->drr_type = DRR_FREE;
 	drrf->drr_object = object;
 	drrf->drr_offset = offset;
 	drrf->drr_length = length;
-	drrf->drr_toguid = ba->toguid;
+	drrf->drr_toguid = dsp->dsa_toguid;
 	if (length == -1ULL) {
-		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+		if (dump_bytes(dsp, dsp->dsa_drr,
+		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 	} else {
-		ba->pending_op = PENDING_FREE;
+		dsp->dsa_pending_op = PENDING_FREE;
 	}
 
 	return (0);
 }
 
 static int
-dump_data(struct backuparg *ba, dmu_object_type_t type,
+dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
     uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
 {
-	struct drr_write *drrw = &(ba->drr->drr_u.drr_write);
+	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
 
 
 	/*
@@ -164,19 +150,20 @@
 	 * the stream, since aggregation can't be done across operations
 	 * of different types.
 	 */
-	if (ba->pending_op != PENDING_NONE) {
-		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dsp->dsa_pending_op != PENDING_NONE) {
+		if (dump_bytes(dsp, dsp->dsa_drr,
+		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
-		ba->pending_op = PENDING_NONE;
+		dsp->dsa_pending_op = PENDING_NONE;
 	}
 	/* write a DATA record */
-	bzero(ba->drr, sizeof (dmu_replay_record_t));
-	ba->drr->drr_type = DRR_WRITE;
+	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+	dsp->dsa_drr->drr_type = DRR_WRITE;
 	drrw->drr_object = object;
 	drrw->drr_type = type;
 	drrw->drr_offset = offset;
 	drrw->drr_length = blksz;
-	drrw->drr_toguid = ba->toguid;
+	drrw->drr_toguid = dsp->dsa_toguid;
 	drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
 	if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
 		drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
@@ -185,42 +172,43 @@
 	DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
 	drrw->drr_key.ddk_cksum = bp->blk_cksum;
 
-	if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
 		return (EINTR);
-	if (dump_bytes(ba, data, blksz) != 0)
+	if (dump_bytes(dsp, data, blksz) != 0)
 		return (EINTR);
 	return (0);
 }
 
 static int
-dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data)
+dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
 {
-	struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill);
+	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
 
-	if (ba->pending_op != PENDING_NONE) {
-		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dsp->dsa_pending_op != PENDING_NONE) {
+		if (dump_bytes(dsp, dsp->dsa_drr,
+		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
-		ba->pending_op = PENDING_NONE;
+		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
 	/* write a SPILL record */
-	bzero(ba->drr, sizeof (dmu_replay_record_t));
-	ba->drr->drr_type = DRR_SPILL;
+	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+	dsp->dsa_drr->drr_type = DRR_SPILL;
 	drrs->drr_object = object;
 	drrs->drr_length = blksz;
-	drrs->drr_toguid = ba->toguid;
+	drrs->drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
+	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
 		return (EINTR);
-	if (dump_bytes(ba, data, blksz))
+	if (dump_bytes(dsp, data, blksz))
 		return (EINTR);
 	return (0);
 }
 
 static int
-dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
+dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 {
-	struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects);
+	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
 
 	/*
 	 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
@@ -229,13 +217,14 @@
 	 * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
 	 * can only be aggregated with other DRR_FREEOBJECTS records.
 	 */
-	if (ba->pending_op != PENDING_NONE &&
-	    ba->pending_op != PENDING_FREEOBJECTS) {
-		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dsp->dsa_pending_op != PENDING_NONE &&
+	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
+		if (dump_bytes(dsp, dsp->dsa_drr,
+		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
-		ba->pending_op = PENDING_NONE;
+		dsp->dsa_pending_op = PENDING_NONE;
 	}
-	if (ba->pending_op == PENDING_FREEOBJECTS) {
+	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
 		/*
 		 * See whether this free object array can be aggregated
 		 * with pending one
@@ -245,42 +234,43 @@
 			return (0);
 		} else {
 			/* can't be aggregated.  Push out pending record */
-			if (dump_bytes(ba, ba->drr,
+			if (dump_bytes(dsp, dsp->dsa_drr,
 			    sizeof (dmu_replay_record_t)) != 0)
 				return (EINTR);
-			ba->pending_op = PENDING_NONE;
+			dsp->dsa_pending_op = PENDING_NONE;
 		}
 	}
 
 	/* write a FREEOBJECTS record */
-	bzero(ba->drr, sizeof (dmu_replay_record_t));
-	ba->drr->drr_type = DRR_FREEOBJECTS;
+	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+	dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
 	drrfo->drr_firstobj = firstobj;
 	drrfo->drr_numobjs = numobjs;
-	drrfo->drr_toguid = ba->toguid;
+	drrfo->drr_toguid = dsp->dsa_toguid;
 
-	ba->pending_op = PENDING_FREEOBJECTS;
+	dsp->dsa_pending_op = PENDING_FREEOBJECTS;
 
 	return (0);
 }
 
 static int
-dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp)
+dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
 {
-	struct drr_object *drro = &(ba->drr->drr_u.drr_object);
+	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
 
 	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
-		return (dump_freeobjects(ba, object, 1));
+		return (dump_freeobjects(dsp, object, 1));
 
-	if (ba->pending_op != PENDING_NONE) {
-		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dsp->dsa_pending_op != PENDING_NONE) {
+		if (dump_bytes(dsp, dsp->dsa_drr,
+		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
-		ba->pending_op = PENDING_NONE;
+		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
 	/* write an OBJECT record */
-	bzero(ba->drr, sizeof (dmu_replay_record_t));
-	ba->drr->drr_type = DRR_OBJECT;
+	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+	dsp->dsa_drr->drr_type = DRR_OBJECT;
 	drro->drr_object = object;
 	drro->drr_type = dnp->dn_type;
 	drro->drr_bonustype = dnp->dn_bonustype;
@@ -288,19 +278,19 @@
 	drro->drr_bonuslen = dnp->dn_bonuslen;
 	drro->drr_checksumtype = dnp->dn_checksum;
 	drro->drr_compress = dnp->dn_compress;
-	drro->drr_toguid = ba->toguid;
+	drro->drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
 		return (EINTR);
 
-	if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
 		return (EINTR);
 
 	/* free anything past the end of the file */
-	if (dump_free(ba, object, (dnp->dn_maxblkid + 1) *
+	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
 	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
 		return (EINTR);
-	if (ba->err)
+	if (dsp->dsa_err)
 		return (EINTR);
 	return (0);
 }
@@ -314,7 +304,7 @@
 backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
-	struct backuparg *ba = arg;
+	dmu_sendarg_t *dsp = arg;
 	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
 	int err = 0;
 
@@ -327,10 +317,10 @@
 	} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
 		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
-		err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
+		err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
 	} else if (bp == NULL) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
-		err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span);
+		err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
 	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
 		return (0);
 	} else if (type == DMU_OT_DNODE) {
@@ -349,7 +339,7 @@
 		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
 			uint64_t dnobj = (zb->zb_blkid <<
 			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
-			err = dump_dnode(ba, dnobj, blk+i);
+			err = dump_dnode(dsp, dnobj, blk+i);
 			if (err)
 				break;
 		}
@@ -364,7 +354,7 @@
 		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
 			return (EIO);
 
-		err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data);
+		err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
 		(void) arc_buf_remove_ref(abuf, &abuf);
 	} else { /* it's a level-0 block of a regular object */
 		uint32_t aflags = ARC_WAIT;
@@ -388,7 +378,7 @@
 			}
 		}
 
-		err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz,
+		err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
 		    blksz, bp, abuf->b_data);
 		(void) arc_buf_remove_ref(abuf, &abuf);
 	}
@@ -398,13 +388,13 @@
 }
 
 int
-dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    vnode_t *vp, offset_t *off)
+dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+    int outfd, vnode_t *vp, offset_t *off)
 {
 	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
 	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 	dmu_replay_record_t *drr;
-	struct backuparg ba;
+	dmu_sendarg_t *dsp;
 	int err;
 	uint64_t fromtxg = 0;
 
@@ -445,8 +435,10 @@
 #ifdef _KERNEL
 	if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
 		uint64_t version;
-		if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0)
+		if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
+			kmem_free(drr, sizeof (dmu_replay_record_t));
 			return (EINVAL);
+		}
 		if (version == ZPL_VERSION_SA) {
 			DMU_SET_FEATUREFLAGS(
 			    drr->drr_u.drr_begin.drr_versioninfo,
@@ -473,46 +465,59 @@
 	if (fromorigin)
 		dsl_dataset_rele(fromds, FTAG);
 
-	ba.drr = drr;
-	ba.vp = vp;
-	ba.os = tosnap;
-	ba.off = off;
-	ba.toguid = ds->ds_phys->ds_guid;
-	ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0);
-	ba.pending_op = PENDING_NONE;
+	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 
-	if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) {
-		kmem_free(drr, sizeof (dmu_replay_record_t));
-		return (ba.err);
+	dsp->dsa_drr = drr;
+	dsp->dsa_vp = vp;
+	dsp->dsa_outfd = outfd;
+	dsp->dsa_proc = curproc;
+	dsp->dsa_os = tosnap;
+	dsp->dsa_off = off;
+	dsp->dsa_toguid = ds->ds_phys->ds_guid;
+	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
+	dsp->dsa_pending_op = PENDING_NONE;
+
+	mutex_enter(&ds->ds_sendstream_lock);
+	list_insert_head(&ds->ds_sendstreams, dsp);
+	mutex_exit(&ds->ds_sendstream_lock);
+
+	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+		err = dsp->dsa_err;
+		goto out;
 	}
 
 	err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
-	    backup_cb, &ba);
+	    backup_cb, dsp);
 
-	if (ba.pending_op != PENDING_NONE)
-		if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0)
+	if (dsp->dsa_pending_op != PENDING_NONE)
+		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
 			err = EINTR;
 
 	if (err) {
-		if (err == EINTR && ba.err)
-			err = ba.err;
-		kmem_free(drr, sizeof (dmu_replay_record_t));
-		return (err);
+		if (err == EINTR && dsp->dsa_err)
+			err = dsp->dsa_err;
+		goto out;
 	}
 
 	bzero(drr, sizeof (dmu_replay_record_t));
 	drr->drr_type = DRR_END;
-	drr->drr_u.drr_end.drr_checksum = ba.zc;
-	drr->drr_u.drr_end.drr_toguid = ba.toguid;
+	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
+	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) {
-		kmem_free(drr, sizeof (dmu_replay_record_t));
-		return (ba.err);
+	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+		err = dsp->dsa_err;
+		goto out;
 	}
 
-	kmem_free(drr, sizeof (dmu_replay_record_t));
+out:
+	mutex_enter(&ds->ds_sendstream_lock);
+	list_remove(&ds->ds_sendstreams, dsp);
+	mutex_exit(&ds->ds_sendstream_lock);
 
-	return (0);
+	kmem_free(drr, sizeof (dmu_replay_record_t));
+	kmem_free(dsp, sizeof (dmu_sendarg_t));
+
+	return (err);
 }
 
 int
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed May 09 15:05:14 2012 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/dmu_objset.h>
@@ -29,6 +30,7 @@
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dmu_traverse.h>
+#include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/arc.h>
 #include <sys/zio.h>
@@ -394,6 +396,8 @@
 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
+		mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
+
 		rw_init(&ds->ds_rwlock, 0, 0, 0);
 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
 
@@ -401,6 +405,9 @@
 		dsl_deadlist_open(&ds->ds_deadlist,
 		    mos, ds->ds_phys->ds_deadlist_obj);
 
+		list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
+		    offsetof(dmu_sendarg_t, dsa_link));
+
 		if (err == 0) {
 			err = dsl_dir_open_obj(dp,
 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Wed May 09 15:05:14 2012 -0700
@@ -24,6 +24,7 @@
  */
 /*
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -704,8 +705,8 @@
 void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
     dmu_traverse_cb_t cb, void *arg);
 
-int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    struct vnode *vp, offset_t *off);
+int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+    int outfd, struct vnode *vp, offset_t *off);
 int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign,
     uint64_t *sizep);
 
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Wed May 09 15:05:14 2012 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef _SYS_DMU_IMPL_H
@@ -30,6 +31,7 @@
 #include <sys/zio.h>
 #include <sys/dnode.h>
 #include <sys/zfs_context.h>
+#include <sys/zfs_ioctl.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -264,6 +266,32 @@
 	atomic_add_64(&xuio_stats.stat.value.ui64, (val))
 #define	XUIOSTAT_BUMP(stat)	XUIOSTAT_INCR(stat, 1)
 
+/*
+ * The list of data whose inclusion in a send stream can be pending from
+ * one call to backup_cb to another.  Multiple calls to dump_free() and
+ * dump_freeobjects() can be aggregated into a single DRR_FREE or
+ * DRR_FREEOBJECTS replay record.
+ */
+typedef enum {
+	PENDING_NONE,
+	PENDING_FREE,
+	PENDING_FREEOBJECTS
+} dmu_pendop_t;
+
+typedef struct dmu_sendarg {
+	list_node_t dsa_link;
+	dmu_replay_record_t *dsa_drr;
+	vnode_t *dsa_vp;
+	int dsa_outfd;
+	struct proc *dsa_proc;
+	offset_t *dsa_off;
+	objset_t *dsa_os;
+	zio_cksum_t dsa_zc;
+	uint64_t dsa_toguid;
+	int dsa_err;
+	dmu_pendop_t dsa_pending_op;
+} dmu_sendarg_t;
+
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed May 09 15:05:14 2012 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DATASET_H
@@ -150,6 +151,9 @@
 	uint64_t ds_reserved;	/* cached refreservation */
 	uint64_t ds_quota;	/* cached refquota */
 
+	kmutex_t ds_sendstream_lock;
+	list_t ds_sendstreams;
+
 	/* Protected by ds_lock; keep at end of struct for better locality */
 	char ds_snapname[MAXNAMELEN];
 } dsl_dataset_t;
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Wed May 09 15:05:14 2012 -0700
@@ -23,6 +23,7 @@
  * Portions Copyright 2011 Martin Matuska
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -51,6 +52,7 @@
 #include <sys/dsl_prop.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
+#include <sys/dmu_impl.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
 #include <sys/sunldi.h>
@@ -3885,8 +3887,8 @@
 		}
 
 		off = fp->f_offset;
-		error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj,
-		    fp->f_vnode, &off);
+		error = dmu_send(tosnap, fromsnap, zc->zc_obj,
+		    zc->zc_cookie, fp->f_vnode, &off);
 
 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 			fp->f_offset = off;
@@ -3898,6 +3900,49 @@
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name	name of snapshot on which to report progress
+ * zc_cookie	file descriptor of send stream
+ *
+ * outputs:
+ * zc_cookie	number of bytes written in send stream thus far
+ */
+static int
+zfs_ioc_send_progress(zfs_cmd_t *zc)
+{
+	dsl_dataset_t *ds;
+	dmu_sendarg_t *dsp = NULL;
+	int error;
+
+	if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
+		return (error);
+
+	mutex_enter(&ds->ds_sendstream_lock);
+
+	/*
+	 * Iterate over all the send streams currently active on this dataset.
+	 * If there's one which matches the specified file descriptor _and_ the
+	 * stream was started by the current process, return the progress of
+	 * that stream.
+	 */
+	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
+	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
+		if (dsp->dsa_outfd == zc->zc_cookie &&
+		    dsp->dsa_proc == curproc)
+			break;
+	}
+
+	if (dsp != NULL)
+		zc->zc_cookie = *(dsp->dsa_off);
+	else
+		error = ENOENT;
+
+	mutex_exit(&ds->ds_sendstream_lock);
+	dsl_dataset_rele(ds, FTAG);
+	return (error);
+}
+
 static int
 zfs_ioc_inject_fault(zfs_cmd_t *zc)
 {
@@ -4895,6 +4940,8 @@
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
 	{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
 	    POOL_CHECK_SUSPENDED },
+	{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+	    POOL_CHECK_NONE }
 };
 
 int
--- a/usr/src/uts/common/sys/fs/zfs.h	Tue May 08 05:52:36 2012 -0500
+++ b/usr/src/uts/common/sys/fs/zfs.h	Wed May 09 15:05:14 2012 -0700
@@ -23,6 +23,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -791,7 +792,8 @@
 	ZFS_IOC_SPACE_SNAPS,
 	ZFS_IOC_DESTROY_SNAPS_NVL,
 	ZFS_IOC_POOL_REGUID,
-	ZFS_IOC_POOL_REOPEN
+	ZFS_IOC_POOL_REOPEN,
+	ZFS_IOC_SEND_PROGRESS
 } zfs_ioc_t;
 
 /*