1144 blkdev should support dump(9e)
authorGarrett D'Amore <garrett@nexenta.com>
Mon, 11 Jul 2011 07:38:13 -0700
changeset 13397 edfb98e65b97
parent 13396 c1f01cd09a07
child 13398 fa0b6e3a91f5
1144 blkdev should support dump(9e) Reviewed by: Gordon Ross <[email protected]> Reviewed by: Alexey Zaytsev <[email protected]> Approved by: Eric Schrock <[email protected]>
usr/src/uts/common/io/blkdev/blkdev.c
usr/src/uts/common/io/sdcard/impl/sda_mem.c
usr/src/uts/common/io/sdcard/impl/sda_slot.c
usr/src/uts/common/sys/blkdev.h
--- a/usr/src/uts/common/io/blkdev/blkdev.c	Thu Jul 07 13:45:10 2011 -0400
+++ b/usr/src/uts/common/io/blkdev/blkdev.c	Mon Jul 11 07:38:13 2011 -0700
@@ -22,6 +22,10 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+
 #include <sys/types.h>
 #include <sys/ksynch.h>
 #include <sys/kmem.h>
@@ -122,6 +126,7 @@
 #define	i_kaddr		i_public.x_kaddr
 #define	i_nblks		i_public.x_nblks
 #define	i_blkno		i_public.x_blkno
+#define	i_flags		i_public.x_flags
 
 
 /*
@@ -136,6 +141,7 @@
 static int bd_close(dev_t, int, int, cred_t *);
 static int bd_strategy(struct buf *);
 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static int bd_dump(dev_t, caddr_t, daddr_t, int);
 static int bd_read(dev_t, struct uio *, cred_t *);
 static int bd_write(dev_t, struct uio *, cred_t *);
 static int bd_aread(dev_t, struct aio_req *, cred_t *);
@@ -166,7 +172,7 @@
 	bd_close, 		/* close */
 	bd_strategy, 		/* strategy */
 	nodev, 			/* print */
-	nodev,			/* dump */
+	bd_dump,		/* dump */
 	bd_read, 		/* read */
 	bd_write, 		/* write */
 	bd_ioctl, 		/* ioctl */
@@ -399,7 +405,7 @@
 	rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
 	    bd->d_removable, bd->d_hotpluggable,
 	    drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
-	    CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, bd);
+	    CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
 	if (rv != 0) {
 		cmlb_free_handle(&bd->d_cmlbh);
 		kmem_cache_destroy(bd->d_cache);
@@ -473,7 +479,7 @@
 	} else {
 		kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 	}
-	cmlb_detach(bd->d_cmlbh, bd);
+	cmlb_detach(bd->d_cmlbh, 0);
 	cmlb_free_handle(&bd->d_cmlbh);
 	if (bd->d_devid)
 		ddi_devid_free(bd->d_devid);
@@ -704,7 +710,7 @@
 
 	bd_update_state(bd);
 
-	if (cmlb_validate(bd->d_cmlbh, 0, bd) != 0) {
+	if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
 
 		/* non-blocking opens are allowed to succeed */
 		if (!ndelay) {
@@ -712,7 +718,7 @@
 			goto done;
 		}
 	} else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
-	    NULL, NULL, bd) == 0) {
+	    NULL, NULL, 0) == 0) {
 
 		/*
 		 * We read the partinfo, verify valid ranges.  If the
@@ -820,7 +826,7 @@
 	mutex_exit(&bd->d_ocmutex);
 
 	if (last) {
-		cmlb_invalidate(bd->d_cmlbh, bd);
+		cmlb_invalidate(bd->d_cmlbh, 0);
 	}
 	rw_exit(&bd_lock);
 
@@ -828,6 +834,75 @@
 }
 
 static int
+bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
+{
+	minor_t		inst;
+	minor_t		part;
+	diskaddr_t	pstart;
+	diskaddr_t	psize;
+	bd_t		*bd;
+	bd_xfer_impl_t	*xi;
+	buf_t		*bp;
+	int		rv;
+
+	rw_enter(&bd_lock, RW_READER);
+
+	part = BDPART(dev);
+	inst = BDINST(dev);
+
+	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
+		rw_exit(&bd_lock);
+		return (ENXIO);
+	}
+	/*
+	 * do cmlb, but do it synchronously unless we already have the
+	 * partition (which we probably should.)
+	 */
+	if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
+	    (void *)1)) {
+		rw_exit(&bd_lock);
+		return (ENXIO);
+	}
+
+	if ((blkno + nblk) > psize) {
+		rw_exit(&bd_lock);
+		return (EINVAL);
+	}
+	bp = getrbuf(KM_NOSLEEP);
+	if (bp == NULL) {
+		rw_exit(&bd_lock);
+		return (ENOMEM);
+	}
+
+	bp->b_bcount = nblk << bd->d_blkshift;
+	bp->b_resid = bp->b_bcount;
+	bp->b_lblkno = blkno;
+	bp->b_un.b_addr = caddr;
+
+	xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
+	if (xi == NULL) {
+		rw_exit(&bd_lock);
+		freerbuf(bp);
+		return (ENOMEM);
+	}
+	xi->i_blkno = blkno + pstart;
+	xi->i_flags = BD_XFER_POLL;
+	bd_submit(bd, xi);
+	rw_exit(&bd_lock);
+
+	/*
+	 * Generally, we should have run this entirely synchronously
+	 * at this point and the biowait call should be a no-op.  If
+	 * it didn't happen this way, it's a bug in the underlying
+	 * driver not honoring BD_XFER_POLL.
+	 */
+	(void) biowait(bp);
+	rv = geterror(bp);
+	freerbuf(bp);
+	return (rv);
+}
+
+static int
 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
 {
 	_NOTE(ARGUNUSED(credp));
@@ -882,7 +957,7 @@
 	}
 
 	if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
-	    NULL, NULL, bd)) {
+	    NULL, NULL, 0)) {
 		bioerror(bp, ENXIO);
 		biodone(bp);
 		return (0);
@@ -942,7 +1017,7 @@
 		return (ENXIO);
 	}
 
-	rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, bd);
+	rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
 	if (rv != ENOTTY)
 		return (rv);
 
@@ -1048,7 +1123,7 @@
 		    name, valuep, lengthp));
 
 	return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
-	    valuep, lengthp, BDPART(dev), bd));
+	    valuep, lengthp, BDPART(dev), 0));
 }
 
 
@@ -1061,17 +1136,24 @@
 	bd_xfer_impl_t	*xi;
 	int		rv;
 	int		(*func)(void *, bd_xfer_t *);
-
-	_NOTE(ARGUNUSED(dip));
+	int		kmflag;
 
+	/*
+	 * If we are running in polled mode (such as during dump(9e)
+	 * execution), then we cannot sleep for kernel allocations.
+	 */
+	kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
 
-	bd = tg_cookie;
+	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
+
 	if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
 		/* We can only transfer whole blocks at a time! */
 		return (EINVAL);
 	}
 
-	bp = getrbuf(KM_SLEEP);
+	if ((bp = getrbuf(kmflag)) == NULL) {
+		return (ENOMEM);
+	}
 
 	switch (cmd) {
 	case TG_READ:
@@ -1089,13 +1171,13 @@
 
 	bp->b_un.b_addr = bufaddr;
 	bp->b_bcount = length;
-	xi = bd_xfer_alloc(bd, bp, func, KM_SLEEP);
+	xi = bd_xfer_alloc(bd, bp, func, kmflag);
 	if (xi == NULL) {
 		rv = geterror(bp);
 		freerbuf(bp);
 		return (rv);
 	}
-
+	xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
 	xi->i_blkno = start;
 	bd_submit(bd, xi);
 	(void) biowait(bp);
@@ -1110,8 +1192,8 @@
 {
 	bd_t		*bd;
 
-	_NOTE(ARGUNUSED(dip));
-	bd = tg_cookie;
+	_NOTE(ARGUNUSED(tg_cookie));
+	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
 
 	switch (cmd) {
 	case TG_GETPHYGEOM:
@@ -1157,7 +1239,7 @@
 	struct buf	*bp;
 	int		rv;
 
-	ASSERT(mutex_owned(&bd->d_iomutex));
+	mutex_enter(&bd->d_iomutex);
 
 	while ((bd->d_qactive < bd->d_qsize) &&
 	    ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
@@ -1165,21 +1247,31 @@
 		kstat_waitq_to_runq(bd->d_kiop);
 		list_insert_tail(&bd->d_runq, xi);
 
-		/* Submit the job to driver */
+		/*
+		 * Submit the job to the driver.  We drop the I/O mutex
+		 * so that we can deal with the case where the driver
+		 * completion routine calls back into us synchronously.
+		 */
+
+		mutex_exit(&bd->d_iomutex);
+
 		rv = xi->i_func(bd->d_private, &xi->i_public);
 		if (rv != 0) {
-			bd->d_qactive--;
-			kstat_runq_exit(bd->d_kiop);
-			list_remove(&bd->d_runq, xi);
-
-			mutex_exit(&bd->d_iomutex);
 			bp = xi->i_bp;
 			bd_xfer_free(xi);
 			bioerror(bp, rv);
 			biodone(bp);
+
+			mutex_enter(&bd->d_iomutex);
+			bd->d_qactive--;
+			kstat_runq_exit(bd->d_kiop);
+			list_remove(&bd->d_runq, xi);
+		} else {
 			mutex_enter(&bd->d_iomutex);
 		}
 	}
+
+	mutex_exit(&bd->d_iomutex);
 }
 
 static void
@@ -1188,8 +1280,9 @@
 	mutex_enter(&bd->d_iomutex);
 	list_insert_tail(&bd->d_waitq, xi);
 	kstat_waitq_enter(bd->d_kiop);
+	mutex_exit(&bd->d_iomutex);
+
 	bd_sched(bd);
-	mutex_exit(&bd->d_iomutex);
 }
 
 static void
@@ -1198,10 +1291,12 @@
 	bd_t	*bd = xi->i_bd;
 	buf_t	*bp = xi->i_bp;
 
-	ASSERT(mutex_owned(&bd->d_iomutex));
-
+	mutex_enter(&bd->d_iomutex);
 	bd->d_qactive--;
 	kstat_runq_exit(bd->d_kiop);
+	list_remove(&bd->d_runq, xi);
+	mutex_exit(&bd->d_iomutex);
+
 	if (err == 0) {
 		if (bp->b_flags & B_READ) {
 			bd->d_kiop->reads++;
@@ -1211,7 +1306,6 @@
 			bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
 		}
 	}
-	list_remove(&bd->d_runq, xi);
 	bd_sched(bd);
 }
 
@@ -1274,9 +1368,9 @@
 
 	if (docmlb) {
 		if (state == DKIO_INSERTED) {
-			(void) cmlb_validate(bd->d_cmlbh, 0, bd);
+			(void) cmlb_validate(bd->d_cmlbh, 0, 0);
 		} else {
-			cmlb_invalidate(bd->d_cmlbh, bd);
+			cmlb_invalidate(bd->d_cmlbh, 0);
 		}
 	}
 }
@@ -1505,10 +1599,8 @@
 	bd_t		*bd = xi->i_bd;
 	size_t		len;
 
-	mutex_enter(&bd->d_iomutex);
 	if (err != 0) {
 		bd_runq_exit(xi, err);
-		mutex_exit(&bd->d_iomutex);
 
 		bp->b_resid += xi->i_resid;
 		bd_xfer_free(xi);
@@ -1523,7 +1615,6 @@
 	if (xi->i_resid == 0) {
 		/* Job completed succcessfully! */
 		bd_runq_exit(xi, 0);
-		mutex_exit(&bd->d_iomutex);
 
 		bd_xfer_free(xi);
 		biodone(bp);
@@ -1547,7 +1638,6 @@
 	if ((rv != DDI_SUCCESS) ||
 	    (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
 		bd_runq_exit(xi, EFAULT);
-		mutex_exit(&bd->d_iomutex);
 
 		bp->b_resid += xi->i_resid;
 		bd_xfer_free(xi);
@@ -1562,16 +1652,12 @@
 	rv = xi->i_func(bd->d_private, &xi->i_public);
 	if (rv != 0) {
 		bd_runq_exit(xi, rv);
-		mutex_exit(&bd->d_iomutex);
 
 		bp->b_resid += xi->i_resid;
 		bd_xfer_free(xi);
 		bioerror(bp, rv);
 		biodone(bp);
-		return;
 	}
-
-	mutex_exit(&bd->d_iomutex);
 }
 
 void
--- a/usr/src/uts/common/io/sdcard/impl/sda_mem.c	Thu Jul 07 13:45:10 2011 -0400
+++ b/usr/src/uts/common/io/sdcard/impl/sda_mem.c	Mon Jul 11 07:38:13 2011 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -141,6 +142,9 @@
 	uint8_t		cmd;
 	uint16_t	flags;
 
+	if (xfer->x_flags & BD_XFER_POLL) {
+		return (EIO);
+	}
 	if (xfer->x_nblks > 1) {
 		cmd = CMD_READ_MULTI;
 		flags = SDA_CMDF_DAT | SDA_CMDF_MEM | SDA_CMDF_READ |
@@ -160,6 +164,9 @@
 	uint8_t		cmd;
 	uint16_t	flags;
 
+	if (xfer->x_flags & BD_XFER_POLL) {
+		return (EIO);
+	}
 	if ((slot->s_flags & SLOTF_WRITABLE) == 0) {
 		return (EROFS);
 	}
--- a/usr/src/uts/common/io/sdcard/impl/sda_slot.c	Thu Jul 07 13:45:10 2011 -0400
+++ b/usr/src/uts/common/io/sdcard/impl/sda_slot.c	Mon Jul 11 07:38:13 2011 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -484,7 +484,6 @@
 	NULL,			/* sync_cache */
 	sda_mem_bd_read,
 	sda_mem_bd_write,
-	NULL			/* dump */
 };
 
 void
--- a/usr/src/uts/common/sys/blkdev.h	Thu Jul 07 13:45:10 2011 -0400
+++ b/usr/src/uts/common/sys/blkdev.h	Mon Jul 11 07:38:13 2011 -0700
@@ -19,6 +19,7 @@
  * CDDL HEADER END
  */
 /*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
@@ -40,9 +41,7 @@
  * but do not need all the capabilities of SCSA.  So we make quite a few
  * simplifications:
  *
- * 1) Device block size is fixed at 512 bytes.  (Devices with larger
- *    block sizes can still operate, but will need to support some
- *    form of read-modify-write, and will take a performance penalty.)
+ * 1) Device block size is a multiple of 512 bytes.
  *
  * 2) Non-rotating media.  We assume a simple linear layout.
  *
@@ -89,6 +88,7 @@
 	ddi_dma_cookie_t	x_dmac;
 	unsigned		x_ndmac;
 	caddr_t			x_kaddr;
+	unsigned		x_flags;
 };
 
 #define	BD_XFER_POLL		(1U << 0)	/* no interrupts (dump) */
@@ -130,7 +130,6 @@
 	int	(*o_sync_cache)(void *, bd_xfer_t *);
 	int	(*o_read)(void *, bd_xfer_t *);
 	int	(*o_write)(void *, bd_xfer_t *);
-	int	(*o_dump)(void *, bd_xfer_t *);
 };
 
 #define	BD_OPS_VERSION_0		0