6909809 COMSTAR should avoid extra data copy to zvol-based backing store
6931076 COMSTAR qlt driver should be able to dynamically map passed-in buffers
6933737 When QLT is loaded, fcinfo reports supported speeds returns 1g for the 8Gb HBA's
6912734 qlt: qlt emits spurious warnings during init on debug kernels
--- a/usr/src/uts/common/Makefile.files Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/Makefile.files Wed May 05 10:23:23 2010 -0700
@@ -969,7 +969,7 @@
STMF_OBJS += lun_map.o stmf.o
-STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o
+STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o
SYSMSG_OBJS += sysmsg.o
--- a/usr/src/uts/common/fs/zfs/sys/zvol.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zvol.h Wed May 05 10:23:23 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZVOL_H
@@ -59,6 +58,15 @@
extern int zvol_busy(void);
extern void zvol_init(void);
extern void zvol_fini(void);
+
+extern int zvol_get_volume_params(minor_t minor, uint64_t *blksize,
+ uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
+ void **rl_hdl, void **bonus_hdl);
+extern uint64_t zvol_get_volume_size(void *minor_hdl);
+extern int zvol_get_volume_wce(void *minor_hdl);
+extern void zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off,
+ ssize_t resid, boolean_t sync);
+
#endif
#ifdef __cplusplus
--- a/usr/src/uts/common/fs/zfs/zvol.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zvol.c Wed May 05 10:23:23 2010 -0700
@@ -1458,6 +1458,79 @@
}
/*
+ * BEGIN entry points to allow external callers access to the volume.
+ */
+/*
+ * Return the volume parameters needed for access from an external caller.
+ * These values are invariant as long as the volume is held open.
+ */
+int
+zvol_get_volume_params(minor_t minor, uint64_t *blksize,
+ uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
+ void **rl_hdl, void **bonus_hdl)
+{
+ zvol_state_t *zv;
+
+ if (minor == 0)
+ return (ENXIO);
+ if ((zv = ddi_get_soft_state(zvol_state, minor)) == NULL)
+ return (ENXIO);
+ if (zv->zv_flags & ZVOL_DUMPIFIED)
+ return (ENXIO);
+
+ ASSERT(blksize && max_xfer_len && minor_hdl &&
+ objset_hdl && zil_hdl && rl_hdl && bonus_hdl);
+
+ *blksize = zv->zv_volblocksize;
+ *max_xfer_len = (uint64_t)zvol_maxphys;
+ *minor_hdl = zv;
+ *objset_hdl = zv->zv_objset;
+ *zil_hdl = zv->zv_zilog;
+ *rl_hdl = &zv->zv_znode;
+ *bonus_hdl = zv->zv_dbuf;
+ return (0);
+}
+
+/*
+ * Return the current volume size to an external caller.
+ * The size can change while the volume is open.
+ */
+uint64_t
+zvol_get_volume_size(void *minor_hdl)
+{
+ zvol_state_t *zv = minor_hdl;
+
+ return (zv->zv_volsize);
+}
+
+/*
+ * Return the current WCE setting to an external caller.
+ * The WCE setting can change while the volume is open.
+ */
+int
+zvol_get_volume_wce(void *minor_hdl)
+{
+ zvol_state_t *zv = minor_hdl;
+
+ return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0);
+}
+
+/*
+ * Entry point for external callers to zvol_log_write
+ */
+void
+zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
+ boolean_t sync)
+{
+ zvol_state_t *zv = minor_hdl;
+
+ zvol_log_write(zv, tx, off, resid, sync);
+}
+/*
+ * END entry points to allow external callers access to the volume.
+ */
+
+/*
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
*/
/*ARGSUSED*/
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c Wed May 05 10:23:23 2010 -0700
@@ -1405,6 +1405,7 @@
lu->lu_send_status_done = sbd_send_status_done;
lu->lu_task_free = sbd_task_free;
lu->lu_abort = sbd_abort;
+ lu->lu_dbuf_free = sbd_dbuf_free;
lu->lu_ctl = sbd_ctl;
lu->lu_info = sbd_info;
sl->sl_state = STMF_STATE_OFFLINE;
@@ -1432,6 +1433,8 @@
uint64_t supported_size;
vattr_t vattr;
enum vtype vt;
+ struct dk_cinfo dki;
+ int unused;
mutex_enter(&sl->sl_lock);
if (vp_valid) {
@@ -1483,6 +1486,7 @@
}
/* sl_data_readable size includes any metadata. */
sl->sl_data_readable_size = vattr.va_size;
+
if (VOP_PATHCONF(sl->sl_data_vp, _PC_FILESIZEBITS, &nbits,
CRED(), NULL) != 0) {
nbits = 0;
@@ -1532,6 +1536,21 @@
ret = EINVAL;
goto odf_close_data_and_exit;
}
+ /*
+ * Get the minor device for direct zvol access
+ */
+ if (sl->sl_flags & SL_ZFS_META) {
+ if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
+ FKIOCTL, kcred, &unused, NULL)) != 0) {
+ cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
+ /* zvol reserves 0, so this would fail later */
+ sl->sl_zvol_minor = 0;
+ } else {
+ sl->sl_zvol_minor = dki.dki_unit;
+ if (sbd_zvol_get_volume_params(sl) == 0)
+ sl->sl_flags |= SL_CALL_ZVOL;
+ }
+ }
sl->sl_flags |= SL_MEDIA_LOADED;
mutex_exit(&sl->sl_lock);
return (0);
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h Wed May 05 10:23:23 2010 -0700
@@ -254,6 +254,40 @@
*/
#define SBD_IT_HAS_SCSI2_RESERVATION 0x0001
+/*
+ * dbuf private data needed for direct zvol data transfers
+ *
+ * To further isolate the zvol knowledge, the object handles
+ * needed to call into zfs are declared void * here.
+ */
+
+typedef struct sbd_zvol_io {
+ uint64_t zvio_offset; /* offset into volume */
+ int zvio_flags; /* flags */
+ void *zvio_dbp; /* array of dmu buffers */
+ void *zvio_abp; /* array of arc buffers */
+ uio_t *zvio_uio; /* for copy operations */
+} sbd_zvol_io_t;
+
+#define ZVIO_DEFAULT 0
+#define ZVIO_COMMIT 1
+#define ZVIO_ABORT 2
+#define ZVIO_SYNC 4
+#define ZVIO_ASYNC 8
+
+/*
+ * zvol data path functions
+ */
+int sbd_zvol_get_volume_params(sbd_lu_t *sl);
+uint32_t sbd_zvol_numsegs(sbd_lu_t *sl, uint64_t off, uint32_t len);
+int sbd_zvol_alloc_read_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf);
+void sbd_zvol_rele_read_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf);
+int sbd_zvol_alloc_write_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf);
+void sbd_zvol_rele_write_bufs_abort(sbd_lu_t *sl, stmf_data_buf_t *dbuf);
+int sbd_zvol_rele_write_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf);
+int sbd_zvol_copy_read(sbd_lu_t *sl, uio_t *uio);
+int sbd_zvol_copy_write(sbd_lu_t *sl, uio_t *uio, int flags);
+
stmf_status_t sbd_task_alloc(struct scsi_task *task);
void sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf);
void sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf);
@@ -261,6 +295,7 @@
void sbd_task_free(struct scsi_task *task);
stmf_status_t sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg,
uint32_t flags);
+void sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf);
void sbd_ctl(struct stmf_lu *lu, int cmd, void *arg);
stmf_status_t sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg,
uint8_t *buf, uint32_t *bufsizep);
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c Wed May 05 10:23:23 2010 -0700
@@ -92,8 +92,6 @@
stmf_data_buf_t *dbuf);
void sbd_handle_short_write_transfers(scsi_task_t *task,
stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
-static void sbd_handle_sync_cache(struct scsi_task *task,
- struct stmf_data_buf *initial_dbuf);
void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
uint32_t buflen);
void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
@@ -105,6 +103,8 @@
extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
+void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
+ int first_xfer);
/*
* IMPORTANT NOTE:
* =================
@@ -178,6 +178,269 @@
}
}
+/*
+ * sbd_zcopy: Bail-out switch for reduced copy path.
+ *
+ * 0 - read & write off
+ * 1 - read & write on
+ * 2 - only read on
+ * 4 - only write on
+ */
+int sbd_zcopy = 1; /* enable zcopy read & write path */
+uint32_t sbd_max_xfer_len = 0; /* Valid if non-zero */
+uint32_t sbd_1st_xfer_len = 0; /* Valid if non-zero */
+uint32_t sbd_copy_threshold = 0; /* Valid if non-zero */
+
+static void
+sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ sbd_zvol_io_t *zvio;
+ int ret, final_xfer;
+ uint64_t offset;
+ uint32_t xfer_len, max_len, first_len;
+ stmf_status_t xstat;
+ stmf_data_buf_t *dbuf;
+ uint_t nblks;
+ uint64_t blksize = sl->sl_blksize;
+ size_t db_private_sz;
+ hrtime_t xfer_start, xfer_elapsed;
+ uintptr_t pad;
+
+ ASSERT(rw_read_held(&sl->sl_access_state_lock));
+ ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
+
+ /*
+ * Calculate the limits on xfer_len to the minimum of :
+ * - task limit
+ * - lun limit
+ * - sbd global limit if set
+ * - first xfer limit if set
+ *
+ * First, protect against silly over-ride value
+ */
+ if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
+ cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
+ sbd_max_xfer_len);
+ sbd_max_xfer_len = 0;
+ }
+ if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
+ cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
+ sbd_1st_xfer_len);
+ sbd_1st_xfer_len = 0;
+ }
+
+ max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
+ if (sbd_max_xfer_len)
+ max_len = MIN(max_len, sbd_max_xfer_len);
+ /*
+ * Special case the first xfer if hints are set.
+ */
+ if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
+ /* global over-ride has precedence */
+ if (sbd_1st_xfer_len)
+ first_len = sbd_1st_xfer_len;
+ else
+ first_len = task->task_1st_xfer_len;
+ } else {
+ first_len = 0;
+ }
+
+ while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
+
+ xfer_len = MIN(max_len, scmd->len);
+ if (first_len) {
+ xfer_len = MIN(xfer_len, first_len);
+ first_len = 0;
+ }
+ if (scmd->len == xfer_len) {
+ final_xfer = 1;
+ } else {
+ /*
+ * Attempt to end xfer on a block boundary.
+ * The only way this does not happen is if the
+ * xfer_len is small enough to stay contained
+ * within the same block.
+ */
+ uint64_t xfer_offset, xfer_aligned_end;
+
+ final_xfer = 0;
+ xfer_offset = scmd->addr + scmd->current_ro;
+ xfer_aligned_end =
+ P2ALIGN(xfer_offset+xfer_len, blksize);
+ if (xfer_aligned_end > xfer_offset)
+ xfer_len = xfer_aligned_end - xfer_offset;
+ }
+ /*
+ * Allocate object to track the read and reserve
+ * enough space for scatter/gather list.
+ */
+ offset = scmd->addr + scmd->current_ro;
+ nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
+
+ db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
+ (nblks * sizeof (stmf_sglist_ent_t));
+ dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
+ AF_DONTZERO);
+ /*
+ * Setup the dbuf
+ *
+ * XXX Framework does not handle variable length sglists
+ * properly, so setup db_lu_private and db_port_private
+ * fields here. db_stmf_private is properly set for
+ * calls to stmf_free.
+ */
+ if (dbuf->db_port_private == NULL) {
+ /*
+ * XXX Framework assigns space to PP after db_sglist[0]
+ */
+ cmn_err(CE_PANIC, "db_port_private == NULL");
+ }
+ pad = (uintptr_t)&dbuf->db_sglist[nblks];
+ dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
+ dbuf->db_port_private = NULL;
+ dbuf->db_buf_size = xfer_len;
+ dbuf->db_data_size = xfer_len;
+ dbuf->db_relative_offset = scmd->current_ro;
+ dbuf->db_sglist_length = (uint16_t)nblks;
+ dbuf->db_xfer_status = 0;
+ dbuf->db_handle = 0;
+
+ dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
+ DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
+ if (final_xfer)
+ dbuf->db_flags |= DB_SEND_STATUS_GOOD;
+
+ zvio = dbuf->db_lu_private;
+ /* Need absolute offset for zvol access */
+ zvio->zvio_offset = offset;
+ zvio->zvio_flags = ZVIO_SYNC;
+
+ /*
+ * Accounting for start of read.
+ * Note there is no buffer address for the probe yet.
+ */
+ stmf_lu_xfer_start(task);
+ DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, xfer_len,
+ uint64_t, offset, scsi_task_t *, task);
+ xfer_start = gethrtime();
+
+ ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
+
+ xfer_elapsed = gethrtime() - xfer_start;
+
+ stmf_lu_xfer_done(task, B_TRUE /* read */, (uint64_t)xfer_len,
+ xfer_elapsed);
+ DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, xfer_len,
+ uint64_t, offset, int, ret, scsi_task_t *, task);
+
+ if (ret != 0) {
+ /*
+ * Read failure from the backend.
+ */
+ stmf_free(dbuf);
+ if (scmd->nbufs == 0) {
+ /* nothing queued, just finish */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ rw_exit(&sl->sl_access_state_lock);
+ } else {
+ /* process failure when other dbufs finish */
+ scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+ }
+ return;
+ }
+
+
+ /*
+ * Allow PP to do setup
+ */
+ xstat = stmf_setup_dbuf(task, dbuf, 0);
+ if (xstat != STMF_SUCCESS) {
+ /*
+ * This could happen if the driver cannot get the
+ * DDI resources it needs for this request.
+ * If other dbufs are queued, try again when the next
+ * one completes, otherwise give up.
+ */
+ sbd_zvol_rele_read_bufs(sl, dbuf);
+ stmf_free(dbuf);
+ if (scmd->nbufs > 0) {
+ /* completion of previous dbuf will retry */
+ return;
+ }
+ /*
+ * Done with this command.
+ */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ if (first_xfer)
+ stmf_scsilib_send_status(task, STATUS_QFULL, 0);
+ else
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ rw_exit(&sl->sl_access_state_lock);
+ return;
+ }
+ /*
+ * dbuf is now queued on task
+ */
+ scmd->nbufs++;
+
+ /* XXX leave this in for FW? */
+ DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
+ struct stmf_data_buf *, dbuf, uint64_t, offset,
+ uint32_t, xfer_len);
+ /*
+ * Do not pass STMF_IOF_LU_DONE so that the zvol
+ * state can be released in the completion callback.
+ */
+ xstat = stmf_xfer_data(task, dbuf, 0);
+ switch (xstat) {
+ case STMF_SUCCESS:
+ break;
+ case STMF_BUSY:
+ /*
+ * The dbuf is queued on the task, but unknown
+ * to the PP, thus no completion will occur.
+ */
+ sbd_zvol_rele_read_bufs(sl, dbuf);
+ stmf_teardown_dbuf(task, dbuf);
+ stmf_free(dbuf);
+ scmd->nbufs--;
+ if (scmd->nbufs > 0) {
+ /* completion of previous dbuf will retry */
+ return;
+ }
+ /*
+ * Done with this command.
+ */
+ rw_exit(&sl->sl_access_state_lock);
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ if (first_xfer)
+ stmf_scsilib_send_status(task, STATUS_QFULL, 0);
+ else
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ return;
+ case STMF_ABORTED:
+ /*
+ * Completion from task_done will cleanup
+ */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ return;
+ }
+ /*
+ * Update the xfer progress.
+ */
+ ASSERT(scmd->len >= xfer_len);
+ scmd->len -= xfer_len;
+ scmd->current_ro += xfer_len;
+ }
+}
+
void
sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
struct stmf_data_buf *dbuf)
@@ -225,6 +488,325 @@
sbd_do_read_xfer(task, scmd, dbuf);
}
+/*
+ * This routine must release the DMU resources and free the dbuf
+ * in all cases. If this is the final dbuf of the task, then drop
+ * the reader lock on the LU state. If there are no errors and more
+ * work to do, then queue more xfer operations.
+ */
+void
+sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
+ struct stmf_data_buf *dbuf)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ stmf_status_t xfer_status;
+ uint32_t data_size;
+ int scmd_err;
+
+ ASSERT(dbuf->db_lu_private);
+ ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
+
+ scmd->nbufs--; /* account for this dbuf */
+ /*
+ * Release the DMU resources.
+ */
+ sbd_zvol_rele_read_bufs(sl, dbuf);
+ /*
+ * Release the dbuf after retrieving needed fields.
+ */
+ xfer_status = dbuf->db_xfer_status;
+ data_size = dbuf->db_data_size;
+ stmf_teardown_dbuf(task, dbuf);
+ stmf_free(dbuf);
+ /*
+ * Release the state lock if this is the last completion.
+ * If this is the last dbuf on task and all data has been
+ * transferred or an error encountered, then no more dbufs
+ * will be queued.
+ */
+ scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
+ (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
+ (xfer_status != STMF_SUCCESS));
+ if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
+ /* all DMU state has been released */
+ rw_exit(&sl->sl_access_state_lock);
+ }
+
+ /*
+ * If there have been no errors, either complete the task
+ * or issue more data xfer operations.
+ */
+ if (!scmd_err) {
+ /*
+ * This chunk completed successfully
+ */
+ task->task_nbytes_transferred += data_size;
+ if (scmd->nbufs == 0 && scmd->len == 0) {
+ /*
+ * This command completed successfully
+ *
+ * Status was sent along with data, so no status
+ * completion will occur. Tell stmf we are done.
+ */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ stmf_task_lu_done(task);
+ return;
+ }
+ /*
+ * Start more xfers
+ */
+ sbd_do_sgl_read_xfer(task, scmd, 0);
+ return;
+ }
+ /*
+ * Sort out the failure
+ */
+ if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
+ /*
+ * If a previous error occurred, leave the command active
+ * and wait for the last completion to send the status check.
+ */
+ if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ if (scmd->nbufs == 0) {
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ }
+ return;
+ }
+ /*
+ * Must have been a failure on current dbuf
+ */
+ ASSERT(xfer_status != STMF_SUCCESS);
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
+ }
+}
+
+void
+sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
+ struct stmf_data_buf *dbuf)
+{
+ sbd_zvol_io_t *zvio = dbuf->db_lu_private;
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ int ret;
+ int scmd_err, scmd_xfer_done;
+ stmf_status_t xfer_status = dbuf->db_xfer_status;
+ uint32_t data_size = dbuf->db_data_size;
+ hrtime_t xfer_start;
+
+ ASSERT(zvio);
+
+ /*
+ * Allow PP to free up resources before releasing the write bufs
+ * as writing to the backend could take some time.
+ */
+ stmf_teardown_dbuf(task, dbuf);
+
+ scmd->nbufs--; /* account for this dbuf */
+ /*
+ * All data was queued and this is the last completion,
+ * but there could still be an error.
+ */
+ scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
+ scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
+ (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
+ (xfer_status != STMF_SUCCESS));
+
+ /* start the accounting clock */
+ stmf_lu_xfer_start(task);
+ DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, data_size,
+ uint64_t, zvio->zvio_offset, scsi_task_t *, task);
+ xfer_start = gethrtime();
+
+ if (scmd_err) {
+ /* just return the write buffers */
+ sbd_zvol_rele_write_bufs_abort(sl, dbuf);
+ ret = 0;
+ } else {
+ if (scmd_xfer_done)
+ zvio->zvio_flags = ZVIO_COMMIT;
+ else
+ zvio->zvio_flags = 0;
+ /* write the data */
+ ret = sbd_zvol_rele_write_bufs(sl, dbuf);
+ }
+
+ /* finalize accounting */
+ stmf_lu_xfer_done(task, B_FALSE /* not read */, data_size,
+ (gethrtime() - xfer_start));
+ DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, data_size,
+ uint64_t, zvio->zvio_offset, int, ret, scsi_task_t *, task);
+
+ if (ret != 0) {
+ /* update the error flag */
+ scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+ scmd_err = 1;
+ }
+
+ /* Release the dbuf */
+ stmf_free(dbuf);
+
+ /*
+ * Release the state lock if this is the last completion.
+ * If this is the last dbuf on task and all data has been
+ * transferred or an error encountered, then no more dbufs
+ * will be queued.
+ */
+ if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
+ /* all DMU state has been released */
+ rw_exit(&sl->sl_access_state_lock);
+ }
+ /*
+ * If there have been no errors, either complete the task
+ * or issue more data xfer operations.
+ */
+ if (!scmd_err) {
+ /* This chunk completed successfully */
+ task->task_nbytes_transferred += data_size;
+ if (scmd_xfer_done) {
+ /* This command completed successfully */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
+ (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_ERROR);
+ } else {
+ stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+ }
+ return;
+ }
+ /*
+ * Start more xfers
+ */
+ sbd_do_sgl_write_xfer(task, scmd, 0);
+ return;
+ }
+ /*
+ * Sort out the failure
+ */
+ if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
+ if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ if (scmd->nbufs == 0) {
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_ERROR);
+ }
+ /*
+ * Leave the command active until last dbuf completes.
+ */
+ return;
+ }
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ ASSERT(xfer_status != STMF_SUCCESS);
+ stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
+ }
+}
+
+/*
+ * Handle a copy operation using the zvol interface.
+ *
+ * Similar to the sbd_data_read/write path, except it goes directly through
+ * the zvol interfaces. It can pass a port provider sglist in the
+ * form of uio which is lost through the vn_rdwr path.
+ *
+ * Returns:
+ * STMF_SUCCESS - request handled
+ * STMF_FAILURE - request not handled, caller must deal with error
+ */
+static stmf_status_t
+sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
+ int cmd, int commit)
+{
+ sbd_lu_t *sl = task->task_lu->lu_provider_private;
+ struct uio uio;
+ struct iovec *iov, *tiov, iov1[8];
+ uint32_t len, resid;
+ int ret, i, iovcnt, flags;
+ hrtime_t xfer_start;
+ boolean_t is_read;
+
+ ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
+
+ is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
+ iovcnt = dbuf->db_sglist_length;
+ /* use the stack for small iovecs */
+ if (iovcnt > 8) {
+ iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
+ } else {
+ iov = &iov1[0];
+ }
+
+ /* Convert dbuf sglist to iovec format */
+ len = dbuf->db_data_size;
+ resid = len;
+ tiov = iov;
+ for (i = 0; i < iovcnt; i++) {
+ tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
+ tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
+ resid -= tiov->iov_len;
+ tiov++;
+ }
+ if (resid != 0) {
+ cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
+ if (iov != &iov1[0])
+ kmem_free(iov, iovcnt * sizeof (*iov));
+ return (STMF_FAILURE);
+ }
+ /* Setup the uio struct */
+ uio.uio_iov = iov;
+ uio.uio_iovcnt = iovcnt;
+ uio.uio_loffset = laddr;
+ uio.uio_segflg = (short)UIO_SYSSPACE;
+ uio.uio_resid = (uint64_t)len;
+ uio.uio_llimit = RLIM64_INFINITY;
+
+ /* start the accounting clock */
+ stmf_lu_xfer_start(task);
+ xfer_start = gethrtime();
+ if (is_read == B_TRUE) {
+ uio.uio_fmode = FREAD;
+ uio.uio_extflg = UIO_COPY_CACHED;
+ DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
+ scsi_task_t *, task);
+
+ /* Fetch the data */
+ ret = sbd_zvol_copy_read(sl, &uio);
+
+ DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
+ scsi_task_t *, task);
+ } else {
+ uio.uio_fmode = FWRITE;
+ uio.uio_extflg = UIO_COPY_DEFAULT;
+ DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
+ scsi_task_t *, task);
+
+ flags = (commit) ? ZVIO_COMMIT : 0;
+ /* Write the data */
+ ret = sbd_zvol_copy_write(sl, &uio, flags);
+
+ DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
+ uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
+ scsi_task_t *, task);
+ }
+ /* finalize accounting */
+ stmf_lu_xfer_done(task, is_read, (uint64_t)len,
+ (gethrtime() - xfer_start));
+
+ if (iov != &iov1[0])
+ kmem_free(iov, iovcnt * sizeof (*iov));
+ if (ret != 0) {
+ /* Backend I/O error */
+ return (STMF_FAILURE);
+ }
+ return (STMF_SUCCESS);
+}
+
void
sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
{
@@ -285,6 +867,95 @@
return;
}
+ /*
+ * Determine if this read can directly use DMU buffers.
+ */
+ if (sbd_zcopy & (2|1) && /* Debug switch */
+ initial_dbuf == NULL && /* No PP buffer passed in */
+ sl->sl_flags & SL_CALL_ZVOL && /* zvol backing store */
+ (task->task_additional_flags &
+ TASK_AF_ACCEPT_LU_DBUF)) /* PP allows it */
+ {
+ /*
+ * Reduced copy path
+ */
+ uint32_t copy_threshold, minsize;
+ int ret;
+
+ /*
+ * The sl_access_state_lock will be held shared
+ * for the entire request and released when all
+ * dbufs have completed.
+ */
+ rw_enter(&sl->sl_access_state_lock, RW_READER);
+ if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
+ rw_exit(&sl->sl_access_state_lock);
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ return;
+ }
+
+ /*
+ * Check if setup is more expensive than copying the data.
+ *
+ * Use the global over-ride sbd_zcopy_threshold if set.
+ */
+ copy_threshold = (sbd_copy_threshold > 0) ?
+ sbd_copy_threshold : task->task_copy_threshold;
+ minsize = len;
+ if (len < copy_threshold &&
+ (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
+
+ ret = sbd_copy_rdwr(task, laddr, dbuf,
+ SBD_CMD_SCSI_READ, 0);
+ /* done with the backend */
+ rw_exit(&sl->sl_access_state_lock);
+ if (ret != 0) {
+ /* backend error */
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ } else {
+ /* send along good data */
+ dbuf->db_relative_offset = 0;
+ dbuf->db_data_size = len;
+ dbuf->db_flags = DB_SEND_STATUS_GOOD |
+ DB_DIRECTION_TO_RPORT;
+ /* XXX keep for FW? */
+ DTRACE_PROBE4(sbd__xfer,
+ struct scsi_task *, task,
+ struct stmf_data_buf *, dbuf,
+ uint64_t, laddr, uint32_t, len);
+ (void) stmf_xfer_data(task, dbuf,
+ STMF_IOF_LU_DONE);
+ }
+ return;
+ }
+
+ /* committed to reduced copy */
+ if (task->task_lu_private) {
+ scmd = (sbd_cmd_t *)task->task_lu_private;
+ } else {
+ scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
+ KM_SLEEP);
+ task->task_lu_private = scmd;
+ }
+ /*
+ * Setup scmd to track read progress.
+ */
+ scmd->flags = SBD_SCSI_CMD_ACTIVE;
+ scmd->cmd_type = SBD_CMD_SCSI_READ;
+ scmd->nbufs = 0;
+ scmd->addr = laddr;
+ scmd->len = len;
+ scmd->current_ro = 0;
+
+ /*
+ * Kick-off the read.
+ */
+ sbd_do_sgl_read_xfer(task, scmd, 1);
+ return;
+ }
+
if (initial_dbuf == NULL) {
uint32_t maxsize, minsize, old_minsize;
@@ -311,6 +982,10 @@
dbuf->db_data_size = len;
dbuf->db_flags = DB_SEND_STATUS_GOOD |
DB_DIRECTION_TO_RPORT;
+ /* XXX keep for FW? */
+ DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
+ struct stmf_data_buf *, dbuf,
+ uint64_t, laddr, uint32_t, len);
(void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
} else {
stmf_scsilib_send_status(task, STATUS_CHECK,
@@ -403,6 +1078,224 @@
}
void
+sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ sbd_zvol_io_t *zvio;
+ int ret;
+ uint32_t xfer_len, max_len, first_len;
+ stmf_status_t xstat;
+ stmf_data_buf_t *dbuf;
+ uint_t nblks;
+ uint64_t blksize = sl->sl_blksize;
+ uint64_t offset;
+ size_t db_private_sz;
+ uintptr_t pad;
+
+ ASSERT(rw_read_held(&sl->sl_access_state_lock));
+ ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
+
+ /*
+ * Calculate the limits on xfer_len to the minimum of :
+ * - task limit
+ * - lun limit
+ * - sbd global limit if set
+ * - first xfer limit if set
+ *
+ * First, protect against silly over-ride value
+ */
+ if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
+ cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
+ sbd_max_xfer_len);
+ sbd_max_xfer_len = 0;
+ }
+ if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
+ cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
+ sbd_1st_xfer_len);
+ sbd_1st_xfer_len = 0;
+ }
+
+ max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
+ if (sbd_max_xfer_len)
+ max_len = MIN(max_len, sbd_max_xfer_len);
+ /*
+ * Special case the first xfer if hints are set.
+ */
+ if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
+ /* global over-ride has precedence */
+ if (sbd_1st_xfer_len)
+ first_len = sbd_1st_xfer_len;
+ else
+ first_len = task->task_1st_xfer_len;
+ } else {
+ first_len = 0;
+ }
+
+
+ while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
+
+ xfer_len = MIN(max_len, scmd->len);
+ if (first_len) {
+ xfer_len = MIN(xfer_len, first_len);
+ first_len = 0;
+ }
+ if (xfer_len < scmd->len) {
+ /*
+ * Attempt to end xfer on a block boundary.
+ * The only way this does not happen is if the
+ * xfer_len is small enough to stay contained
+ * within the same block.
+ */
+ uint64_t xfer_offset, xfer_aligned_end;
+
+ xfer_offset = scmd->addr + scmd->current_ro;
+ xfer_aligned_end =
+ P2ALIGN(xfer_offset+xfer_len, blksize);
+ if (xfer_aligned_end > xfer_offset)
+ xfer_len = xfer_aligned_end - xfer_offset;
+ }
+ /*
+ * Allocate object to track the write and reserve
+ * enough space for scatter/gather list.
+ */
+ offset = scmd->addr + scmd->current_ro;
+ nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
+ db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
+ (nblks * sizeof (stmf_sglist_ent_t));
+ dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
+ AF_DONTZERO);
+
+ /*
+ * Setup the dbuf
+ *
+ * XXX Framework does not handle variable length sglists
+ * properly, so setup db_lu_private and db_port_private
+ * fields here. db_stmf_private is properly set for
+ * calls to stmf_free.
+ */
+ if (dbuf->db_port_private == NULL) {
+ /*
+ * XXX Framework assigns space to PP after db_sglist[0]
+ */
+ cmn_err(CE_PANIC, "db_port_private == NULL");
+ }
+ pad = (uintptr_t)&dbuf->db_sglist[nblks];
+ dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
+ dbuf->db_port_private = NULL;
+ dbuf->db_buf_size = xfer_len;
+ dbuf->db_data_size = xfer_len;
+ dbuf->db_relative_offset = scmd->current_ro;
+ dbuf->db_sglist_length = (uint16_t)nblks;
+ dbuf->db_xfer_status = 0;
+ dbuf->db_handle = 0;
+ dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
+ DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
+
+ zvio = dbuf->db_lu_private;
+ zvio->zvio_offset = offset;
+
+ /* get the buffers */
+ ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
+ if (ret != 0) {
+ /*
+ * Could not allocate buffers from the backend;
+ * treat it like an IO error.
+ */
+ stmf_free(dbuf);
+ scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+ if (scmd->nbufs == 0) {
+ /*
+ * Nothing queued, so no completions coming
+ */
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_ERROR);
+ rw_exit(&sl->sl_access_state_lock);
+ }
+ /*
+ * Completions of previous buffers will cleanup.
+ */
+ return;
+ }
+
+ /*
+ * Allow PP to do setup
+ */
+ xstat = stmf_setup_dbuf(task, dbuf, 0);
+ if (xstat != STMF_SUCCESS) {
+ /*
+ * This could happen if the driver cannot get the
+ * DDI resources it needs for this request.
+ * If other dbufs are queued, try again when the next
+ * one completes, otherwise give up.
+ */
+ sbd_zvol_rele_write_bufs_abort(sl, dbuf);
+ stmf_free(dbuf);
+ if (scmd->nbufs > 0) {
+ /* completion of previous dbuf will retry */
+ return;
+ }
+ /*
+ * Done with this command.
+ */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ if (first_xfer)
+ stmf_scsilib_send_status(task, STATUS_QFULL, 0);
+ else
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_ERROR);
+ rw_exit(&sl->sl_access_state_lock);
+ return;
+ }
+
+ /*
+ * dbuf is now queued on task
+ */
+ scmd->nbufs++;
+
+ xstat = stmf_xfer_data(task, dbuf, 0);
+ switch (xstat) {
+ case STMF_SUCCESS:
+ break;
+ case STMF_BUSY:
+ /*
+ * The dbuf is queued on the task, but unknown
+ * to the PP, thus no completion will occur.
+ */
+ sbd_zvol_rele_write_bufs_abort(sl, dbuf);
+ stmf_teardown_dbuf(task, dbuf);
+ stmf_free(dbuf);
+ scmd->nbufs--;
+ if (scmd->nbufs > 0) {
+ /* completion of previous dbuf will retry */
+ return;
+ }
+ /*
+ * Done with this command.
+ */
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ if (first_xfer)
+ stmf_scsilib_send_status(task, STATUS_QFULL, 0);
+ else
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_ERROR);
+ rw_exit(&sl->sl_access_state_lock);
+ return;
+ case STMF_ABORTED:
+ /*
+ * Completion code will cleanup.
+ */
+ scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+ return;
+ }
+ /*
+ * Update the xfer progress.
+ */
+ scmd->len -= xfer_len;
+ scmd->current_ro += xfer_len;
+ }
+}
+
+void
sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
{
@@ -440,19 +1333,36 @@
laddr = scmd->addr + dbuf->db_relative_offset;
- for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
- (ndx < dbuf->db_sglist_length); ndx++) {
- iolen = min(dbuf->db_data_size - buflen,
- dbuf->db_sglist[ndx].seg_length);
- if (iolen == 0)
- break;
- if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
- dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
+ /*
+ * If this is going to a zvol, use the direct call to
+ * sbd_zvol_copy_{read,write}. The direct call interface is
+ * restricted to PPs that accept sglists, but that is not required.
+ */
+ if (sl->sl_flags & SL_CALL_ZVOL &&
+ (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
+ (sbd_zcopy & (4|1))) {
+ int commit;
+
+ commit = (scmd->len == 0 && scmd->nbufs == 0);
+ if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
+ commit) != STMF_SUCCESS)
scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
- break;
+ buflen = dbuf->db_data_size;
+ } else {
+ for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
+ (ndx < dbuf->db_sglist_length); ndx++) {
+ iolen = min(dbuf->db_data_size - buflen,
+ dbuf->db_sglist[ndx].seg_length);
+ if (iolen == 0)
+ break;
+ if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
+ dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
+ scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+ break;
+ }
+ buflen += iolen;
+ laddr += (uint64_t)iolen;
}
- buflen += iolen;
- laddr += (uint64_t)iolen;
}
task->task_nbytes_transferred += buflen;
WRITE_XFER_DONE:
@@ -487,6 +1397,36 @@
sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
}
+/*
+ * Return true if copy avoidance is beneficial.
+ */
+static int
+sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
+ uint64_t blksize)
+{
+ /*
+ * If there is a global copy threshold over-ride, use it.
+ * Otherwise use the PP value with the caveat that at least
+ * 1/2 the data must avoid being copied to be useful.
+ */
+ if (sbd_copy_threshold > 0) {
+ return (len >= sbd_copy_threshold);
+ } else {
+ uint64_t no_copy_span;
+
+ /* sub-blocksize writes always copy */
+ if (len < task->task_copy_threshold || len < blksize)
+ return (0);
+ /*
+ * Calculate amount of data that will avoid the copy path.
+ * The calculation is only valid if len >= blksize.
+ */
+ no_copy_span = P2ALIGN(laddr+len, blksize) -
+ P2ROUNDUP(laddr, blksize);
+ return (no_copy_span >= len/2);
+ }
+}
+
void
sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
{
@@ -559,6 +1499,47 @@
return;
}
+ if (sbd_zcopy & (4|1) && /* Debug switch */
+ initial_dbuf == NULL && /* No PP buf passed in */
+ sl->sl_flags & SL_CALL_ZVOL && /* zvol backing store */
+ (task->task_additional_flags &
+ TASK_AF_ACCEPT_LU_DBUF) && /* PP allows it */
+ sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
+
+ /*
+ * XXX Note that disallowing initial_dbuf will eliminate
+ * iSCSI from participating. For small writes, that is
+ * probably ok. For large writes, it may be best to just
+ * copy the data from the initial dbuf and use zcopy for
+ * the rest.
+ */
+ rw_enter(&sl->sl_access_state_lock, RW_READER);
+ if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
+ rw_exit(&sl->sl_access_state_lock);
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_READ_ERROR);
+ return;
+ }
+ /*
+ * Setup scmd to track the write progress.
+ */
+ if (task->task_lu_private) {
+ scmd = (sbd_cmd_t *)task->task_lu_private;
+ } else {
+ scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
+ KM_SLEEP);
+ task->task_lu_private = scmd;
+ }
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
+ scmd->cmd_type = SBD_CMD_SCSI_WRITE;
+ scmd->nbufs = 0;
+ scmd->addr = laddr;
+ scmd->len = len;
+ scmd->current_ro = 0;
+ sbd_do_sgl_write_xfer(task, scmd, 1);
+ return;
+ }
+
if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
if (initial_dbuf->db_data_size > len) {
if (initial_dbuf->db_data_size >
@@ -589,7 +1570,7 @@
if (do_immediate_data) {
/*
- * Accout for data passed in this write command
+ * Account for data passed in this write command
*/
(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
scmd->len -= dbuf->db_data_size;
@@ -1916,9 +2897,30 @@
void
sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
{
- sbd_cmd_t *scmd = NULL;
-
- scmd = (sbd_cmd_t *)task->task_lu_private;
+ sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+
+ if (dbuf->db_flags & DB_LU_DATA_BUF) {
+ /*
+ * Buffers passed in from the LU always complete
+ * even if the task is no longer active.
+ */
+ ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
+ ASSERT(scmd);
+ switch (scmd->cmd_type) {
+ case (SBD_CMD_SCSI_READ):
+ sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
+ break;
+ case (SBD_CMD_SCSI_WRITE):
+ sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
+ break;
+ default:
+ cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
+ (void *)task);
+ break;
+ }
+ return;
+ }
+
if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
return;
@@ -2005,6 +3007,38 @@
return (STMF_NOT_FOUND);
}
+/*
+ * This function is called during task clean-up if the
+ * DB_LU_FLAG is set on the dbuf. This should only be called for
+ * abort processing after sbd_abort has been called for the task.
+ */
+void
+sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
+{
+ sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+
+ ASSERT(dbuf->db_lu_private);
+ ASSERT(scmd && scmd->nbufs > 0);
+ ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+ ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
+ ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
+
+ if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
+ sbd_zvol_rele_read_bufs(sl, dbuf);
+ } else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
+ sbd_zvol_rele_write_bufs_abort(sl, dbuf);
+ } else {
+ cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
+ scmd->cmd_type, (void *)task);
+ }
+ if (--scmd->nbufs == 0)
+ rw_exit(&sl->sl_access_state_lock);
+ stmf_teardown_dbuf(task, dbuf);
+ stmf_free(dbuf);
+}
+
/* ARGSUSED */
void
sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c Wed May 05 10:23:23 2010 -0700
@@ -0,0 +1,460 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/scsi/scsi.h>
+#include <sys/scsi/impl/scsi_reset_notify.h>
+#include <sys/scsi/generic/mode.h>
+#include <sys/disp.h>
+#include <sys/byteorder.h>
+#include <sys/atomic.h>
+#include <sys/sdt.h>
+#include <sys/dkio.h>
+#include <sys/dmu.h>
+#include <sys/arc.h>
+#include <sys/zvol.h>
+#include <sys/zfs_rlock.h>
+
+#include <stmf.h>
+#include <lpif.h>
+#include <portif.h>
+#include <stmf_ioctl.h>
+#include <stmf_sbd.h>
+#include <stmf_sbd_ioctl.h>
+#include <sbd_impl.h>
+
+
+/*
+ * This file contains direct calls into the zfs module.
+ * These functions mimic zvol_read and zvol_write except pointers
+ * to the data buffers are passed instead of copying the data itself.
+ *
+ * zfs internal interfaces referenced here:
+ *
+ * FUNCTIONS
+ * dmu_buf_hold_array_by_bonus()
+ * dmu_buf_rele_array()
+ *
+ * dmu_request_arc_buf()
+ * dmu_assign_arcbuf()
+ * dmu_return_arc()
+ * arc_buf_size()
+ *
+ * dmu_tx_create()
+ * dmu_tx_hold_write()
+ * dmu_tx_assign()
+ * dmu_tx_commit(tx)
+ * dmu_tx_abort(tx)
+ * zil_commit()
+ *
+ * zfs_range_lock()
+ * zfs_range_unlock()
+ *
+ * zvol_log_write()
+ *
+ * dmu_read_uio()
+ * dmu_write_uio()
+ * MINOR DATA
+ * zv_volsize
+ * zv_volblocksize
+ * zv_flags - for WCE
+ * zv_objset - dmu_tx_create
+ * zv_zilog - zil_commit
+ * zv_znode - zfs_range_lock
+ * zv_dbuf - dmu_buf_hold_array_by_bonus, dmu_request_arcbuf
+ * GLOBAL DATA
+ * zvol_maxphys
+ */
+
+/*
+ * Take direct control of the volume instead of using the driver
+ * interfaces provided by zvol.c. Gather parameters and handles
+ * needed to make direct calls into zfs/dmu/zvol. The driver is
+ * opened exclusively at this point, so these parameters cannot change.
+ *
+ * NOTE: the object size and WCE can change while the device
+ * is open, so they must be fetched for every operation.
+ */
+int
+sbd_zvol_get_volume_params(sbd_lu_t *sl)
+{
+ int ret;
+
+ ret = zvol_get_volume_params(sl->sl_zvol_minor,
+ &sl->sl_blksize, /* volume block size */
+ &sl->sl_max_xfer_len, /* max data chunk size */
+ &sl->sl_zvol_minor_hdl, /* minor soft state */
+ &sl->sl_zvol_objset_hdl, /* dmu_tx_create */
+ &sl->sl_zvol_zil_hdl, /* zil_commit */
+ &sl->sl_zvol_rl_hdl, /* zfs_range_lock */
+ &sl->sl_zvol_bonus_hdl); /* dmu_buf_hold_array_by_bonus, */
+ /* dmu_request_arcbuf, */
+ /* dmu_assign_arcbuf */
+
+ if (ret == 0 && sl->sl_blksize < MMU_PAGESIZE) {
+ cmn_err(CE_NOTE, "COMSTAR reduced copy disabled due to "
+ "small zvol blocksize (%d)\n", (int)sl->sl_blksize);
+ ret = ENOTSUP;
+ }
+
+ return (ret);
+}
+
+/*
+ * Return the number of elements in a scatter/gather list required for
+ * the given span in the zvol. Elements are 1:1 with zvol blocks.
+ */
+uint32_t
+sbd_zvol_numsegs(sbd_lu_t *sl, uint64_t off, uint32_t len)
+{
+ uint64_t blksz = sl->sl_blksize;
+ uint64_t endoff = off + len;
+ uint64_t numsegs;
+
+ numsegs = (P2ROUNDUP(endoff, blksz) - P2ALIGN(off, blksz)) / blksz;
+ return ((uint32_t)numsegs);
+}
+
+/*
+ * Return an array of dmu_buf_t pointers for the requested range.
+ * The dmu buffers are either in cache or read in synchronously.
+ * Fill in the dbuf sglist from the dmu_buf_t array.
+ */
+static void *RDTAG = "sbd_zvol_read";
+
+int
+sbd_zvol_alloc_read_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf)
+{
+ sbd_zvol_io_t *zvio = dbuf->db_lu_private;
+ rl_t *rl;
+ int numbufs, error;
+ uint64_t len = dbuf->db_data_size;
+ uint64_t offset = zvio->zvio_offset;
+ dmu_buf_t **dbpp, *dbp;
+
+ /* Make sure request is reasonable */
+ if (len > sl->sl_max_xfer_len)
+ return (E2BIG);
+ if (offset + len > zvol_get_volume_size(sl->sl_zvol_minor_hdl))
+ return (EIO);
+
+ /*
+ * The range lock is only held until the dmu buffers read in and
+ * held; not during the callers use of the data.
+ */
+ rl = zfs_range_lock(sl->sl_zvol_rl_hdl, offset, len, RL_READER);
+
+ error = dmu_buf_hold_array_by_bonus(sl->sl_zvol_bonus_hdl, offset,
+ len, TRUE, RDTAG, &numbufs, &dbpp);
+
+ zfs_range_unlock(rl);
+
+ if (error == ECKSUM)
+ error = EIO;
+
+ if (error == 0) {
+ /*
+ * Fill in db_sglist from the dmu_buf_t array.
+ */
+ int i;
+ stmf_sglist_ent_t *sgl;
+ uint64_t odiff, seglen;
+
+ zvio->zvio_dbp = dbpp;
+ /* make sure db_sglist is large enough */
+ if (dbuf->db_sglist_length != numbufs) {
+ cmn_err(CE_PANIC, "wrong size sglist: dbuf %d != %d\n",
+ dbuf->db_sglist_length, numbufs);
+ }
+
+ sgl = &dbuf->db_sglist[0];
+ for (i = 0; i < numbufs; i++) {
+ dbp = dbpp[i];
+ odiff = offset - dbp->db_offset;
+ ASSERT(odiff == 0 || i == 0);
+ sgl->seg_addr = (uint8_t *)dbp->db_data + odiff;
+ seglen = MIN(len, dbp->db_size - odiff);
+ sgl->seg_length = (uint32_t)seglen;
+ offset += seglen;
+ len -= seglen;
+ sgl++;
+ }
+ ASSERT(len == 0);
+
+ }
+ return (error);
+}
+
+/*
+ * Release a dmu_buf_t array.
+ */
+/*ARGSUSED*/
+void
+sbd_zvol_rele_read_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf)
+{
+ sbd_zvol_io_t *zvio = dbuf->db_lu_private;
+
+ ASSERT(zvio->zvio_dbp);
+ ASSERT(dbuf->db_sglist_length);
+
+ dmu_buf_rele_array(zvio->zvio_dbp, (int)dbuf->db_sglist_length, RDTAG);
+}
+
+/*
+ * Allocate enough loaned arc buffers for the requested region.
+ * Mimic the handling of the dmu_buf_t array used for reads as closely
+ * as possible even though the arc_buf_t's are anonymous until released.
+ * The buffers will match the zvol object blocks sizes and alignments
+ * such that a data copy may be avoided when the buffers are assigned.
+ */
+int
+sbd_zvol_alloc_write_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf)
+{
+ sbd_zvol_io_t *zvio = dbuf->db_lu_private;
+ int blkshift, numbufs, i;
+ uint64_t blksize;
+ arc_buf_t **abp;
+ stmf_sglist_ent_t *sgl;
+ uint64_t len = dbuf->db_data_size;
+ uint64_t offset = zvio->zvio_offset;
+
+ /* Make sure request is reasonable */
+ if (len > sl->sl_max_xfer_len)
+ return (E2BIG);
+ if (offset + len > zvol_get_volume_size(sl->sl_zvol_minor_hdl))
+ return (EIO);
+
+ /*
+ * Break up the request into chunks to match
+ * the volume block size. Only full, and aligned
+ * buffers will avoid the data copy in the dmu.
+ */
+ /*
+ * calculate how may dbufs are needed
+ */
+ blksize = sl->sl_blksize;
+ ASSERT(ISP2(blksize));
+ blkshift = highbit(blksize - 1);
+ /*
+ * taken from dmu_buf_hold_array_by_dnode()
+ */
+ numbufs = (P2ROUNDUP(offset+len, 1ULL<<blkshift) -
+ P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
+ if (dbuf->db_sglist_length != numbufs) {
+ cmn_err(CE_PANIC, "wrong size sglist: dbuf %d != %d\n",
+ dbuf->db_sglist_length, numbufs);
+ }
+ /*
+ * allocate a holder for the needed arc_buf pointers
+ */
+ abp = kmem_alloc(sizeof (arc_buf_t *) * numbufs, KM_SLEEP);
+ /*
+ * The write operation uses loaned arc buffers so that
+ * the xfer_data is done outside of a dmu transaction.
+ * These buffers will exactly match the request unlike
+ * the dmu buffers obtained from the read operation.
+ */
+ /*
+ * allocate the arc buffers and fill in the stmf sglist
+ */
+ sgl = &dbuf->db_sglist[0];
+ for (i = 0; i < numbufs; i++) {
+ uint64_t seglen;
+
+ /* first block may not be aligned */
+ seglen = P2NPHASE(offset, blksize);
+ if (seglen == 0)
+ seglen = blksize;
+ seglen = MIN(seglen, len);
+ abp[i] = dmu_request_arcbuf(sl->sl_zvol_bonus_hdl, (int)seglen);
+ ASSERT(arc_buf_size(abp[i]) == (int)seglen);
+ sgl->seg_addr = abp[i]->b_data;
+ sgl->seg_length = (uint32_t)seglen;
+ sgl++;
+ offset += seglen;
+ len -= seglen;
+ }
+ ASSERT(len == 0);
+
+ zvio->zvio_abp = abp;
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+sbd_zvol_rele_write_bufs_abort(sbd_lu_t *sl, stmf_data_buf_t *dbuf)
+{
+ sbd_zvol_io_t *zvio = dbuf->db_lu_private;
+ int i;
+ arc_buf_t **abp = zvio->zvio_abp;
+
+ /* free arcbufs */
+ for (i = 0; i < dbuf->db_sglist_length; i++)
+ dmu_return_arcbuf(*abp++);
+ kmem_free(zvio->zvio_abp,
+ sizeof (arc_buf_t *) * dbuf->db_sglist_length);
+ zvio->zvio_abp = NULL;
+}
+
+/*
+ * Release the arc_buf_t array allocated above and handle these cases :
+ *
+ * flags == 0 - create transaction and assign all arc bufs to offsets
+ * flags == ZVIO_COMMIT - same as above and commit to zil on sync devices
+ */
+int
+sbd_zvol_rele_write_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf)
+{
+ sbd_zvol_io_t *zvio = dbuf->db_lu_private;
+ dmu_tx_t *tx;
+ int sync, i, error;
+ rl_t *rl;
+ arc_buf_t **abp = zvio->zvio_abp;
+ int flags = zvio->zvio_flags;
+ uint64_t toffset, offset = zvio->zvio_offset;
+ uint64_t resid, len = dbuf->db_data_size;
+
+ ASSERT(flags == 0 || flags == ZVIO_COMMIT || flags == ZVIO_ABORT);
+
+ rl = zfs_range_lock(sl->sl_zvol_rl_hdl, offset, len, RL_WRITER);
+
+ tx = dmu_tx_create(sl->sl_zvol_objset_hdl);
+ dmu_tx_hold_write(tx, ZVOL_OBJ, offset, (int)len);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+
+ if (error) {
+ dmu_tx_abort(tx);
+ zfs_range_unlock(rl);
+ sbd_zvol_rele_write_bufs_abort(sl, dbuf);
+ return (error);
+ }
+
+ toffset = offset;
+ resid = len;
+ for (i = 0; i < dbuf->db_sglist_length; i++) {
+ arc_buf_t *abuf;
+ int size;
+
+ abuf = abp[i];
+ size = arc_buf_size(abuf);
+ dmu_assign_arcbuf(sl->sl_zvol_bonus_hdl, toffset, abuf, tx);
+ toffset += size;
+ resid -= size;
+ }
+ ASSERT(resid == 0);
+
+ sync = !zvol_get_volume_wce(sl->sl_zvol_minor_hdl);
+ zvol_log_write_minor(sl->sl_zvol_minor_hdl, tx, offset,
+ (ssize_t)len, sync);
+ dmu_tx_commit(tx);
+ zfs_range_unlock(rl);
+ kmem_free(zvio->zvio_abp,
+ sizeof (arc_buf_t *) * dbuf->db_sglist_length);
+ zvio->zvio_abp = NULL;
+ if (sync && (flags & ZVIO_COMMIT))
+ zil_commit(sl->sl_zvol_zil_hdl, UINT64_MAX, ZVOL_OBJ);
+ return (0);
+}
+
+/*
+ * Copy interface for callers using direct zvol access.
+ * Very similar to zvol_read but the uio may have multiple iovec entries.
+ */
+int
+sbd_zvol_copy_read(sbd_lu_t *sl, uio_t *uio)
+{
+ int error;
+ rl_t *rl;
+ uint64_t len = (uint64_t)uio->uio_resid;
+ uint64_t offset = (uint64_t)uio->uio_loffset;
+
+ /* Make sure request is reasonable */
+ if (len > sl->sl_max_xfer_len)
+ return (E2BIG);
+ if (offset + len > zvol_get_volume_size(sl->sl_zvol_minor_hdl))
+ return (EIO);
+
+ rl = zfs_range_lock(sl->sl_zvol_rl_hdl, offset, len, RL_READER);
+
+ error = dmu_read_uio(sl->sl_zvol_objset_hdl, ZVOL_OBJ, uio, len);
+
+ zfs_range_unlock(rl);
+ if (error == ECKSUM)
+ error = EIO;
+ return (error);
+}
+
+/*
+ * Copy interface for callers using direct zvol access.
+ * Very similar to zvol_write but the uio may have multiple iovec entries.
+ */
+int
+sbd_zvol_copy_write(sbd_lu_t *sl, uio_t *uio, int flags)
+{
+ rl_t *rl;
+ dmu_tx_t *tx;
+ int error, sync;
+ uint64_t len = (uint64_t)uio->uio_resid;
+ uint64_t offset = (uint64_t)uio->uio_loffset;
+
+ ASSERT(flags == 0 || flags == ZVIO_COMMIT);
+
+ /* Make sure request is reasonable */
+ if (len > sl->sl_max_xfer_len)
+ return (E2BIG);
+ if (offset + len > zvol_get_volume_size(sl->sl_zvol_minor_hdl))
+ return (EIO);
+
+ rl = zfs_range_lock(sl->sl_zvol_rl_hdl, offset, len, RL_WRITER);
+
+ sync = !zvol_get_volume_wce(sl->sl_zvol_minor_hdl);
+
+ tx = dmu_tx_create(sl->sl_zvol_objset_hdl);
+ dmu_tx_hold_write(tx, ZVOL_OBJ, offset, (int)uio->uio_resid);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
+ } else {
+ /*
+ * XXX use the new bonus handle entry.
+ */
+ error = dmu_write_uio(sl->sl_zvol_objset_hdl, ZVOL_OBJ,
+ uio, len, tx);
+ if (error == 0) {
+ zvol_log_write_minor(sl->sl_zvol_minor_hdl, tx, offset,
+ (ssize_t)len, sync);
+ }
+ dmu_tx_commit(tx);
+ }
+ zfs_range_unlock(rl);
+ if (sync && (flags & ZVIO_COMMIT))
+ zil_commit(sl->sl_zvol_zil_hdl, UINT64_MAX, ZVOL_OBJ);
+ if (error == ECKSUM)
+ error = EIO;
+ return (error);
+}
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _STMF_SBD_H
@@ -221,6 +220,13 @@
/* zfs metadata */
krwlock_t sl_zfs_meta_lock;
char *sl_zfs_meta;
+ minor_t sl_zvol_minor; /* for direct zvol calls */
+ /* opaque handles for zvol direct calls */
+ void *sl_zvol_minor_hdl;
+ void *sl_zvol_objset_hdl;
+ void *sl_zvol_zil_hdl;
+ void *sl_zvol_rl_hdl;
+ void *sl_zvol_bonus_hdl;
/* Backing store */
char *sl_data_filename;
@@ -230,6 +236,8 @@
uint64_t sl_data_readable_size; /* read() fails after this */
uint64_t sl_data_offset; /* After the metadata,if any */
uint64_t sl_lu_size; /* READ CAPACITY size */
+ uint64_t sl_blksize; /* used for zvols */
+ uint64_t sl_max_xfer_len; /* used for zvols */
struct sbd_it_data *sl_it_list;
struct sbd_pgr *sl_pgr;
@@ -258,6 +266,7 @@
#define SL_ZFS_META 0x10000
#define SL_WRITEBACK_CACHE_SET_UNSUPPORTED 0x20000
#define SL_FLUSH_ON_DISABLED_WRITECACHE 0x40000
+#define SL_CALL_ZVOL 0x80000
/*
* sl_trans_op. LU is undergoing some transition and this field
--- a/usr/src/uts/common/io/comstar/port/fct/fct.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/port/fct/fct.c Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/conf.h>
@@ -1096,6 +1095,27 @@
pminsize, flags));
}
+stmf_status_t
+fct_setup_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t flags)
+{
+ fct_local_port_t *port = (fct_local_port_t *)
+ task->task_lport->lport_port_private;
+
+ ASSERT(port->port_fds->fds_setup_dbuf != NULL);
+ if (port->port_fds->fds_setup_dbuf == NULL)
+ return (STMF_FAILURE);
+
+ return (port->port_fds->fds_setup_dbuf(port, dbuf, flags));
+}
+
+void
+fct_teardown_dbuf(stmf_dbuf_store_t *ds, stmf_data_buf_t *dbuf)
+{
+ fct_dbuf_store_t *fds = ds->ds_port_private;
+
+ fds->fds_teardown_dbuf(fds, dbuf);
+}
+
void
fct_free_dbuf(stmf_dbuf_store_t *ds, stmf_data_buf_t *dbuf)
{
@@ -1199,6 +1219,8 @@
lport->lport_pp = port->port_pp;
port->port_fds->fds_ds->ds_alloc_data_buf = fct_alloc_dbuf;
port->port_fds->fds_ds->ds_free_data_buf = fct_free_dbuf;
+ port->port_fds->fds_ds->ds_setup_dbuf = fct_setup_dbuf;
+ port->port_fds->fds_ds->ds_teardown_dbuf = fct_teardown_dbuf;
lport->lport_ds = port->port_fds->fds_ds;
lport->lport_xfer_data = fct_xfer_scsi_data;
lport->lport_send_status = fct_send_scsi_status;
@@ -1711,6 +1733,8 @@
void
fct_post_rcvd_cmd(fct_cmd_t *cmd, stmf_data_buf_t *dbuf)
{
+ fct_dbuf_store_t *fds;
+
if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
fct_i_cmd_t *icmd = (fct_i_cmd_t *)cmd->cmd_fct_private;
fct_i_local_port_t *iport =
@@ -1739,6 +1763,30 @@
task->task_additional_flags |=
TASK_AF_PORT_LOAD_HIGH;
}
+ /*
+ * If the target driver accepts sglists, fill in task fields.
+ */
+ fds = cmd->cmd_port->port_fds;
+ if (fds->fds_setup_dbuf != NULL) {
+ task->task_additional_flags |= TASK_AF_ACCEPT_LU_DBUF;
+ task->task_copy_threshold = fds->fds_copy_threshold;
+ task->task_max_xfer_len = fds->fds_max_sgl_xfer_len;
+ /*
+ * A single stream load encounters a little extra
+ * latency if large xfers are done in 1 chunk.
+ * Give a hint to the LU that starting the xfer
+ * with a smaller chunk would be better in this case.
+ * For any other load, use maximum chunk size.
+ */
+ if (load == 1) {
+ /* estimate */
+ task->task_1st_xfer_len = 128*1024;
+ } else {
+ /* zero means no hint */
+ task->task_1st_xfer_len = 0;
+ }
+ }
+
stmf_post_task((scsi_task_t *)cmd->cmd_specific, dbuf);
atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_TRANSITION);
return;
--- a/usr/src/uts/common/io/comstar/port/qlt/qlt.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/port/qlt/qlt.c Wed May 05 10:23:23 2010 -0700
@@ -25,8 +25,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/conf.h>
@@ -1006,7 +1005,8 @@
port_attrs->supported_speed = PORT_SPEED_1G |
PORT_SPEED_2G | PORT_SPEED_4G;
if (qlt->qlt_25xx_chip)
- port_attrs->supported_speed |= PORT_SPEED_8G;
+ port_attrs->supported_speed = PORT_SPEED_2G | PORT_SPEED_4G |
+ PORT_SPEED_8G;
if (qlt->qlt_81xx_chip)
port_attrs->supported_speed = PORT_SPEED_10G;
@@ -1097,6 +1097,9 @@
if (qlt_dmem_init(qlt) != QLT_SUCCESS) {
return (FCT_FAILURE);
}
+ /* Initialize the ddi_dma_handle free pool */
+ qlt_dma_handle_pool_init(qlt);
+
port = (fct_local_port_t *)fct_alloc(FCT_STRUCT_LOCAL_PORT, 0, 0);
if (port == NULL) {
goto qlt_pstart_fail_1;
@@ -1108,6 +1111,10 @@
qlt->qlt_port = port;
fds->fds_alloc_data_buf = qlt_dmem_alloc;
fds->fds_free_data_buf = qlt_dmem_free;
+ fds->fds_setup_dbuf = qlt_dma_setup_dbuf;
+ fds->fds_teardown_dbuf = qlt_dma_teardown_dbuf;
+ fds->fds_max_sgl_xfer_len = QLT_DMA_SG_LIST_LENGTH * MMU_PAGESIZE;
+ fds->fds_copy_threshold = MMU_PAGESIZE;
fds->fds_fca_private = (void *)qlt;
/*
* Since we keep everything in the state struct and dont allocate any
@@ -1158,6 +1165,7 @@
fct_free(port);
qlt->qlt_port = NULL;
qlt_pstart_fail_1:
+ qlt_dma_handle_pool_fini(qlt);
qlt_dmem_fini(qlt);
return (QLT_FAILURE);
}
@@ -1175,6 +1183,7 @@
fct_free(qlt->qlt_port->port_fds);
fct_free(qlt->qlt_port);
qlt->qlt_port = NULL;
+ qlt_dma_handle_pool_fini(qlt);
qlt_dmem_fini(qlt);
return (QLT_SUCCESS);
}
@@ -1333,13 +1342,13 @@
DMEM_WR16(qlt, icb+0x74,
qlt81nvr->enode_mac[4] |
(qlt81nvr->enode_mac[5] << 8));
- } else {
- DMEM_WR32(qlt, icb+0x5c, BIT_11 | BIT_5 | BIT_4 |
- BIT_2 | BIT_1 | BIT_0);
- DMEM_WR32(qlt, icb+0x60, BIT_5);
- DMEM_WR32(qlt, icb+0x64, BIT_14 | BIT_8 | BIT_7 |
- BIT_4);
- }
+ } else {
+ DMEM_WR32(qlt, icb+0x5c, BIT_11 | BIT_5 | BIT_4 |
+ BIT_2 | BIT_1 | BIT_0);
+ DMEM_WR32(qlt, icb+0x60, BIT_5);
+ DMEM_WR32(qlt, icb+0x64, BIT_14 | BIT_8 | BIT_7 |
+ BIT_4);
+ }
if (qlt->qlt_81xx_chip) {
qlt_dmem_bctl_t *bctl;
@@ -2995,7 +3004,7 @@
caddr_t resp = &qlt->resp_ptr[qlt->resp_ndx_to_fw << 6];
uint32_t ent_cnt;
- ent_cnt = (uint32_t)(resp[1]);
+ ent_cnt = (uint32_t)(resp[0] == 0x51 ? resp[1] : 1);
if (ent_cnt > total_ent) {
break;
}
@@ -3375,11 +3384,12 @@
fct_status_t
qlt_xfer_scsi_data(fct_cmd_t *cmd, stmf_data_buf_t *dbuf, uint32_t ioflags)
{
- qlt_dmem_bctl_t *bctl = (qlt_dmem_bctl_t *)dbuf->db_port_private;
- qlt_state_t *qlt = (qlt_state_t *)cmd->cmd_port->port_fca_private;
- qlt_cmd_t *qcmd = (qlt_cmd_t *)cmd->cmd_fca_private;
- uint8_t *req;
- uint16_t flags;
+ qlt_dmem_bctl_t *bctl = (qlt_dmem_bctl_t *)dbuf->db_port_private;
+ qlt_state_t *qlt = (qlt_state_t *)cmd->cmd_port->port_fca_private;
+ qlt_cmd_t *qcmd = (qlt_cmd_t *)cmd->cmd_fca_private;
+ uint8_t *req, rcnt;
+ uint16_t flags;
+ uint16_t cookie_count;
if (dbuf->db_handle == 0)
qcmd->dbuf = dbuf;
@@ -3394,28 +3404,114 @@
if (dbuf->db_flags & DB_SEND_STATUS_GOOD)
flags = (uint16_t)(flags | BIT_15);
+ if (dbuf->db_flags & DB_LU_DATA_BUF) {
+ /*
+ * Data bufs from LU are in scatter/gather list format.
+ */
+ cookie_count = qlt_get_cookie_count(dbuf);
+ rcnt = qlt_get_iocb_count(cookie_count);
+ } else {
+ cookie_count = 1;
+ rcnt = 1;
+ }
mutex_enter(&qlt->req_lock);
- req = (uint8_t *)qlt_get_req_entries(qlt, 1);
+ req = (uint8_t *)qlt_get_req_entries(qlt, rcnt);
if (req == NULL) {
mutex_exit(&qlt->req_lock);
return (FCT_BUSY);
}
- bzero(req, IOCB_SIZE);
- req[0] = 0x12; req[1] = 0x1;
+ bzero(req, IOCB_SIZE); /* XXX needed ? */
+ req[0] = 0x12;
+ req[1] = rcnt;
req[2] = dbuf->db_handle;
QMEM_WR32(qlt, req+4, cmd->cmd_handle);
QMEM_WR16(qlt, req+8, cmd->cmd_rp->rp_handle);
QMEM_WR16(qlt, req+10, 60); /* 60 seconds timeout */
- req[12] = 1;
+ QMEM_WR16(qlt, req+12, cookie_count);
QMEM_WR32(qlt, req+0x10, cmd->cmd_rportid);
QMEM_WR32(qlt, req+0x14, qcmd->fw_xchg_addr);
QMEM_WR16(qlt, req+0x1A, flags);
QMEM_WR16(qlt, req+0x20, cmd->cmd_oxid);
QMEM_WR32(qlt, req+0x24, dbuf->db_relative_offset);
QMEM_WR32(qlt, req+0x2C, dbuf->db_data_size);
- QMEM_WR64(qlt, req+0x34, bctl->bctl_dev_addr);
- QMEM_WR32(qlt, req+0x34+8, dbuf->db_data_size);
- qlt_submit_req_entries(qlt, 1);
+ if (dbuf->db_flags & DB_LU_DATA_BUF) {
+ uint8_t *qptr; /* qlt continuation segs */
+ uint16_t cookie_resid;
+ uint16_t cont_segs;
+ ddi_dma_cookie_t cookie, *ckp;
+
+ /*
+ * See if the dma cookies are in simple array format.
+ */
+ ckp = qlt_get_cookie_array(dbuf);
+
+ /*
+ * Program the first segment into main record.
+ */
+ if (ckp) {
+ ASSERT(ckp->dmac_size);
+ QMEM_WR64(qlt, req+0x34, ckp->dmac_laddress);
+ QMEM_WR32(qlt, req+0x3c, ckp->dmac_size);
+ } else {
+ qlt_ddi_dma_nextcookie(dbuf, &cookie);
+ ASSERT(cookie.dmac_size);
+ QMEM_WR64(qlt, req+0x34, cookie.dmac_laddress);
+ QMEM_WR32(qlt, req+0x3c, cookie.dmac_size);
+ }
+ cookie_resid = cookie_count-1;
+
+ /*
+ * Program remaining segments into continuation records.
+ */
+ while (cookie_resid) {
+ req += IOCB_SIZE;
+ if (req >= (uint8_t *)qlt->resp_ptr) {
+ req = (uint8_t *)qlt->req_ptr;
+ }
+ req[0] = 0x0a;
+ req[1] = 1;
+ req[2] = req[3] = 0; /* tidy */
+ qptr = &req[4];
+ for (cont_segs = CONT_A64_DATA_SEGMENTS;
+ cont_segs && cookie_resid; cont_segs--) {
+
+ if (ckp) {
+ ++ckp; /* next cookie */
+ ASSERT(ckp->dmac_size != 0);
+ QMEM_WR64(qlt, qptr,
+ ckp->dmac_laddress);
+ qptr += 8; /* skip over laddress */
+ QMEM_WR32(qlt, qptr, ckp->dmac_size);
+ qptr += 4; /* skip over size */
+ } else {
+ qlt_ddi_dma_nextcookie(dbuf, &cookie);
+ ASSERT(cookie.dmac_size != 0);
+ QMEM_WR64(qlt, qptr,
+ cookie.dmac_laddress);
+ qptr += 8; /* skip over laddress */
+ QMEM_WR32(qlt, qptr, cookie.dmac_size);
+ qptr += 4; /* skip over size */
+ }
+ cookie_resid--;
+ }
+ /*
+ * zero unused remainder of IOCB
+ */
+ if (cont_segs) {
+ size_t resid;
+ resid = (size_t)((uintptr_t)(req+IOCB_SIZE) -
+ (uintptr_t)qptr);
+ ASSERT(resid < IOCB_SIZE);
+ bzero(qptr, resid);
+ }
+ }
+ } else {
+ /* Single, contiguous buffer */
+ QMEM_WR64(qlt, req+0x34, bctl->bctl_dev_addr);
+ QMEM_WR32(qlt, req+0x34+8, dbuf->db_data_size);
+ }
+
+ qlt_submit_req_entries(qlt, rcnt);
mutex_exit(&qlt->req_lock);
return (STMF_SUCCESS);
@@ -6074,7 +6170,7 @@
qlt_read_string_prop(qlt_state_t *qlt, char *prop, char **prop_val)
{
return (ddi_prop_lookup_string(DDI_DEV_T_ANY, qlt->dip,
- DDI_PROP_DONTPASS | DDI_PROP_CANSLEEP, prop, prop_val));
+ DDI_PROP_DONTPASS, prop, prop_val));
}
static int
--- a/usr/src/uts/common/io/comstar/port/qlt/qlt.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/port/qlt/qlt.h Wed May 05 10:23:23 2010 -0700
@@ -25,8 +25,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _QLT_H
@@ -240,6 +239,7 @@
} qlt_abts_cmd_t;
struct qlt_dmem_bucket;
+struct qlt_ddi_dma_handle_pool;
#define QLT_INTR_FIXED 0x1
#define QLT_INTR_MSI 0x2
@@ -259,6 +259,9 @@
fct_local_port_t *qlt_port;
struct qlt_dmem_bucket **dmem_buckets;
+ struct qlt_dma_handle_pool
+ *qlt_dma_handle_pool;
+
int instance;
uint8_t qlt_state:7,
qlt_state_not_acked:1;
@@ -351,7 +354,6 @@
uint64_t qlt_bumpbucket; /* bigger buffer supplied */
uint64_t qlt_pmintry;
uint64_t qlt_pmin_ok;
-
} qlt_state_t;
/*
--- a/usr/src/uts/common/io/comstar/port/qlt/qlt_dma.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/port/qlt/qlt_dma.c Wed May 05 10:23:23 2010 -0700
@@ -25,8 +25,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/conf.h>
@@ -42,6 +41,12 @@
#include <qlt.h>
#include <qlt_dma.h>
+/*
+ * Local Function Prototypes.
+ */
+static void
+qlt_dma_free_handles(qlt_state_t *qlt, qlt_dma_handle_t *first_handle);
+
#define BUF_COUNT_2K 2048
#define BUF_COUNT_8K 512
#define BUF_COUNT_64K 256
@@ -77,18 +82,18 @@
fct_status_t
qlt_dmem_init(qlt_state_t *qlt)
{
- qlt_dmem_bucket_t *p;
- qlt_dmem_bctl_t *bctl, *bc;
- qlt_dmem_bctl_t *prev;
- int ndx, i;
- uint32_t total_mem;
- uint8_t *addr;
- uint8_t *host_addr;
- uint64_t dev_addr;
- ddi_dma_cookie_t cookie;
- uint32_t ncookie;
- uint32_t bsize;
- size_t len;
+ qlt_dmem_bucket_t *p;
+ qlt_dmem_bctl_t *bctl, *bc;
+ qlt_dmem_bctl_t *prev;
+ int ndx, i;
+ uint32_t total_mem;
+ uint8_t *addr;
+ uint8_t *host_addr;
+ uint64_t dev_addr;
+ ddi_dma_cookie_t cookie;
+ uint32_t ncookie;
+ uint32_t bsize;
+ size_t len;
if (qlt->qlt_bucketcnt[0] != 0) {
bucket2K.dmem_nbufs = qlt->qlt_bucketcnt[0];
@@ -166,7 +171,7 @@
p->dmem_bctl_free_list = bctl;
p->dmem_nbufs_free = p->dmem_nbufs;
for (i = 0; i < p->dmem_nbufs; i++) {
- stmf_data_buf_t *db;
+ stmf_data_buf_t *db;
prev = bctl;
bctl->bctl_bucket = p;
bctl->bctl_buf = db = stmf_alloc(STMF_STRUCT_DATA_BUF,
@@ -216,6 +221,44 @@
}
void
+qlt_dma_handle_pool_init(qlt_state_t *qlt)
+{
+ qlt_dma_handle_pool_t *pool;
+
+ pool = kmem_zalloc(sizeof (*pool), KM_SLEEP);
+ mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
+ qlt->qlt_dma_handle_pool = pool;
+}
+
+void
+qlt_dma_handle_pool_fini(qlt_state_t *qlt)
+{
+ qlt_dma_handle_pool_t *pool;
+ qlt_dma_handle_t *handle, *next_handle;
+
+ pool = qlt->qlt_dma_handle_pool;
+ mutex_enter(&pool->pool_lock);
+ /*
+ * XXX Need to wait for free == total elements
+ * XXX Not sure how other driver shutdown stuff is done.
+ */
+ ASSERT(pool->num_free == pool->num_total);
+ if (pool->num_free != pool->num_total)
+ cmn_err(CE_WARN,
+ "num_free %d != num_total %d\n",
+ pool->num_free, pool->num_total);
+ handle = pool->free_list;
+ while (handle) {
+ next_handle = handle->next;
+ kmem_free(handle, sizeof (*handle));
+ handle = next_handle;
+ }
+ qlt->qlt_dma_handle_pool = NULL;
+ mutex_destroy(&pool->pool_lock);
+ kmem_free(pool, sizeof (*pool));
+}
+
+void
qlt_dmem_fini(qlt_state_t *qlt)
{
qlt_dmem_bucket_t *p;
@@ -254,7 +297,7 @@
/* ARGSUSED */
stmf_data_buf_t *
qlt_i_dmem_alloc(qlt_state_t *qlt, uint32_t size, uint32_t *pminsize,
- uint32_t flags)
+ uint32_t flags)
{
qlt_dmem_bucket_t *p;
qlt_dmem_bctl_t *bctl;
@@ -350,9 +393,13 @@
void
qlt_dmem_free(fct_dbuf_store_t *fds, stmf_data_buf_t *dbuf)
{
- qlt_dmem_bctl_t *bctl = (qlt_dmem_bctl_t *)dbuf->db_port_private;
- qlt_dmem_bucket_t *p = bctl->bctl_bucket;
+ qlt_dmem_bctl_t *bctl;
+ qlt_dmem_bucket_t *p;
+ ASSERT((dbuf->db_flags & DB_LU_DATA_BUF) == 0);
+
+ bctl = (qlt_dmem_bctl_t *)dbuf->db_port_private;
+ p = bctl->bctl_bucket;
mutex_enter(&p->dmem_lock);
bctl->bctl_next = p->dmem_bctl_free_list;
p->dmem_bctl_free_list = bctl;
@@ -363,10 +410,394 @@
void
qlt_dmem_dma_sync(stmf_data_buf_t *dbuf, uint_t sync_type)
{
- qlt_dmem_bctl_t *bctl = (qlt_dmem_bctl_t *)dbuf->db_port_private;
- qlt_dmem_bucket_t *p = bctl->bctl_bucket;
+ qlt_dmem_bctl_t *bctl;
+ qlt_dma_sgl_t *qsgl;
+ qlt_dmem_bucket_t *p;
+ qlt_dma_handle_t *th;
+ int rv;
+
+ if (dbuf->db_flags & DB_LU_DATA_BUF) {
+ /*
+ * go through ddi handle list
+ */
+ qsgl = (qlt_dma_sgl_t *)dbuf->db_port_private;
+ th = qsgl->handle_list;
+ while (th) {
+ rv = ddi_dma_sync(th->dma_handle,
+ 0, 0, sync_type);
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "ddi_dma_sync FAILED\n");
+ }
+ th = th->next;
+ }
+ } else {
+ bctl = (qlt_dmem_bctl_t *)dbuf->db_port_private;
+ p = bctl->bctl_bucket;
+ (void) ddi_dma_sync(p->dmem_dma_handle, (off_t)
+ (bctl->bctl_dev_addr - p->dmem_dev_addr),
+ dbuf->db_data_size, sync_type);
+ }
+}
+
+/*
+ * A very lite version of ddi_dma_addr_bind_handle()
+ */
+uint64_t
+qlt_ddi_vtop(caddr_t vaddr)
+{
+ uint64_t offset, paddr;
+ pfn_t pfn;
+
+ pfn = hat_getpfnum(kas.a_hat, vaddr);
+ ASSERT(pfn != PFN_INVALID && pfn != PFN_SUSPENDED);
+ offset = ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
+ paddr = mmu_ptob(pfn);
+ return (paddr+offset);
+}
+
+static ddi_dma_attr_t qlt_sgl_dma_attr = {
+ DMA_ATTR_V0, /* dma_attr_version */
+ 0, /* low DMA address range */
+ 0xffffffffffffffff, /* high DMA address range */
+ 0xffffffff, /* DMA counter register */
+ 64, /* DMA address alignment */
+ 0xff, /* DMA burstsizes */
+ 1, /* min effective DMA size */
+ 0xffffffff, /* max DMA xfer size */
+ 0xffffffff, /* segment boundary */
+ QLT_DMA_SG_LIST_LENGTH, /* s/g list length */
+ 1, /* granularity of device */
+ 0 /* DMA transfer flags */
+};
+
+/*
+ * Allocate a qlt_dma_handle container and fill it with a ddi_dma_handle
+ */
+static qlt_dma_handle_t *
+qlt_dma_alloc_handle(qlt_state_t *qlt)
+{
+ ddi_dma_handle_t ddi_handle;
+ qlt_dma_handle_t *qlt_handle;
+ int rv;
+
+ rv = ddi_dma_alloc_handle(qlt->dip, &qlt_sgl_dma_attr,
+ DDI_DMA_SLEEP, 0, &ddi_handle);
+ if (rv != DDI_SUCCESS) {
+ EL(qlt, "ddi_dma_alloc_handle status=%xh\n", rv);
+ return (NULL);
+ }
+ qlt_handle = kmem_zalloc(sizeof (qlt_dma_handle_t), KM_SLEEP);
+ qlt_handle->dma_handle = ddi_handle;
+ return (qlt_handle);
+}
+
+/*
+ * Allocate a list of qlt_dma_handle containers from the free list
+ */
+static qlt_dma_handle_t *
+qlt_dma_alloc_handle_list(qlt_state_t *qlt, int handle_count)
+{
+ qlt_dma_handle_pool_t *pool;
+ qlt_dma_handle_t *tmp_handle, *first_handle, *last_handle;
+ int i;
+
+ /*
+ * Make sure the free list can satisfy the request.
+ * Once the free list is primed, it should satisfy most requests.
+ * XXX Should there be a limit on pool size?
+ */
+ pool = qlt->qlt_dma_handle_pool;
+ mutex_enter(&pool->pool_lock);
+ while (handle_count > pool->num_free) {
+ mutex_exit(&pool->pool_lock);
+ if ((tmp_handle = qlt_dma_alloc_handle(qlt)) == NULL)
+ return (NULL);
+ mutex_enter(&pool->pool_lock);
+ tmp_handle->next = pool->free_list;
+ pool->free_list = tmp_handle;
+ pool->num_free++;
+ pool->num_total++;
+ }
+
+ /*
+ * The free list lock is held and the list is large enough to
+ * satisfy this request. Run down the freelist and snip off
+ * the number of elements needed for this request.
+ */
+ first_handle = pool->free_list;
+ tmp_handle = first_handle;
+ for (i = 0; i < handle_count; i++) {
+ last_handle = tmp_handle;
+ tmp_handle = tmp_handle->next;
+ }
+ pool->free_list = tmp_handle;
+ pool->num_free -= handle_count;
+ mutex_exit(&pool->pool_lock);
+ last_handle->next = NULL; /* sanity */
+ return (first_handle);
+}
+
+/*
+ * Return a list of qlt_dma_handle containers to the free list.
+ */
+static void
+qlt_dma_free_handles(qlt_state_t *qlt, qlt_dma_handle_t *first_handle)
+{
+ qlt_dma_handle_pool_t *pool;
+ qlt_dma_handle_t *tmp_handle, *last_handle;
+ int rv, handle_count;
+
+ /*
+ * Traverse the list and unbind the handles
+ */
+ ASSERT(first_handle);
+ tmp_handle = first_handle;
+ handle_count = 0;
+ while (tmp_handle != NULL) {
+ last_handle = tmp_handle;
+ /*
+ * If the handle is bound, unbind the handle so it can be
+ * reused. It may not be bound if there was a bind failure.
+ */
+ if (tmp_handle->num_cookies != 0) {
+ rv = ddi_dma_unbind_handle(tmp_handle->dma_handle);
+ ASSERT(rv == DDI_SUCCESS);
+ tmp_handle->num_cookies = 0;
+ tmp_handle->num_cookies_fetched = 0;
+ }
+ tmp_handle = tmp_handle->next;
+ handle_count++;
+ }
+ /*
+ * Insert this list into the free list
+ */
+ pool = qlt->qlt_dma_handle_pool;
+ mutex_enter(&pool->pool_lock);
+ last_handle->next = pool->free_list;
+ pool->free_list = first_handle;
+ pool->num_free += handle_count;
+ mutex_exit(&pool->pool_lock);
+}
+
+/*
+ * cookies produced by mapping this dbuf
+ */
+uint16_t
+qlt_get_cookie_count(stmf_data_buf_t *dbuf)
+{
+ qlt_dma_sgl_t *qsgl = dbuf->db_port_private;
+
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+ return (qsgl->cookie_count);
+}
+
+ddi_dma_cookie_t
+*qlt_get_cookie_array(stmf_data_buf_t *dbuf)
+{
+ qlt_dma_sgl_t *qsgl = dbuf->db_port_private;
+
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+
+ if (qsgl->cookie_prefetched)
+ return (&qsgl->cookie[0]);
+ else
+ return (NULL);
+}
- (void) ddi_dma_sync(p->dmem_dma_handle, (off_t)
- (bctl->bctl_dev_addr - p->dmem_dev_addr),
- dbuf->db_data_size, sync_type);
+/*
+ * Wrapper around ddi_dma_nextcookie that hides the ddi_dma_handle usage.
+ */
+void
+qlt_ddi_dma_nextcookie(stmf_data_buf_t *dbuf, ddi_dma_cookie_t *cookiep)
+{
+ qlt_dma_sgl_t *qsgl = dbuf->db_port_private;
+
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+
+ if (qsgl->cookie_prefetched) {
+ ASSERT(qsgl->cookie_next_fetch < qsgl->cookie_count);
+ *cookiep = qsgl->cookie[qsgl->cookie_next_fetch++];
+ } else {
+ qlt_dma_handle_t *fetch;
+ qlt_dma_handle_t *FETCH_DONE = (qlt_dma_handle_t *)0xbad;
+
+ ASSERT(qsgl->handle_list != NULL);
+ ASSERT(qsgl->handle_next_fetch != FETCH_DONE);
+
+ fetch = qsgl->handle_next_fetch;
+ if (fetch->num_cookies_fetched == 0) {
+ *cookiep = fetch->first_cookie;
+ } else {
+ ddi_dma_nextcookie(fetch->dma_handle, cookiep);
+ }
+ if (++fetch->num_cookies_fetched == fetch->num_cookies) {
+ if (fetch->next == NULL)
+ qsgl->handle_next_fetch = FETCH_DONE;
+ else
+ qsgl->handle_next_fetch = fetch->next;
+ }
+ }
}
+
+/*
+ * Set this flag to fetch the DDI dma cookies from the handles here and
+ * store them in the port private area of the dbuf. This will allow
+ * faster access to the cookies in qlt_xfer_scsi_data() at the expense of
+ * an extra copy. If the qlt->req_lock is hot, this may help.
+ */
+int qlt_sgl_prefetch = 0;
+
+/*ARGSUSED*/
+stmf_status_t
+qlt_dma_setup_dbuf(fct_local_port_t *port, stmf_data_buf_t *dbuf,
+ uint32_t flags)
+{
+ qlt_state_t *qlt = port->port_fca_private;
+ qlt_dma_sgl_t *qsgl;
+ struct stmf_sglist_ent *sglp;
+ qlt_dma_handle_t *handle_list, *th;
+ int i, rv;
+ ddi_dma_cookie_t *cookie_p;
+ int cookie_count, numbufs;
+ int prefetch;
+ size_t qsize;
+
+ /*
+ * psuedo code:
+ * get dma handle list from cache - one per sglist entry
+ * foreach sglist entry
+ * bind dma handle to sglist vaddr
+ * allocate space for DMA state to store in db_port_private
+ * fill in port private object
+ * if prefetching
+ * move all dma cookies into db_port_private
+ */
+ dbuf->db_port_private = NULL;
+ numbufs = dbuf->db_sglist_length;
+ handle_list = qlt_dma_alloc_handle_list(qlt, numbufs);
+ if (handle_list == NULL) {
+ EL(qlt, "handle_list==NULL\n");
+ return (STMF_FAILURE);
+ }
+ /*
+ * Loop through sglist and bind each entry to a handle
+ */
+ th = handle_list;
+ sglp = &dbuf->db_sglist[0];
+ cookie_count = 0;
+ for (i = 0; i < numbufs; i++, sglp++) {
+
+ /*
+ * Bind this sgl entry to a DDI dma handle
+ */
+ if ((rv = ddi_dma_addr_bind_handle(
+ th->dma_handle,
+ NULL,
+ (caddr_t)(sglp->seg_addr),
+ (size_t)sglp->seg_length,
+ DDI_DMA_RDWR | DDI_DMA_STREAMING,
+ DDI_DMA_DONTWAIT,
+ NULL,
+ &th->first_cookie,
+ &th->num_cookies)) != DDI_DMA_MAPPED) {
+ cmn_err(CE_NOTE, "ddi_dma_addr_bind_handle %d", rv);
+ qlt_dma_free_handles(qlt, handle_list);
+ return (STMF_FAILURE);
+ }
+
+ /*
+ * Add to total cookie count
+ */
+ cookie_count += th->num_cookies;
+ if (cookie_count > QLT_DMA_SG_LIST_LENGTH) {
+ /*
+ * Request exceeds HBA limit
+ */
+ qlt_dma_free_handles(qlt, handle_list);
+ return (STMF_FAILURE);
+ }
+ /* move to next ddi_dma_handle */
+ th = th->next;
+ }
+
+ /*
+ * Allocate our port private object for DMA mapping state.
+ */
+ prefetch = qlt_sgl_prefetch;
+ qsize = sizeof (qlt_dma_sgl_t);
+ if (prefetch) {
+ /* one extra ddi_dma_cookie allocated for alignment padding */
+ qsize += cookie_count * sizeof (ddi_dma_cookie_t);
+ }
+ qsgl = kmem_alloc(qsize, KM_SLEEP);
+ /*
+ * Fill in the sgl
+ */
+ dbuf->db_port_private = qsgl;
+ qsgl->qsize = qsize;
+ qsgl->handle_count = dbuf->db_sglist_length;
+ qsgl->cookie_prefetched = prefetch;
+ qsgl->cookie_count = cookie_count;
+ qsgl->cookie_next_fetch = 0;
+ qsgl->handle_list = handle_list;
+ qsgl->handle_next_fetch = handle_list;
+ if (prefetch) {
+ /*
+ * traverse handle list and move cookies to db_port_private
+ */
+ th = handle_list;
+ cookie_p = &qsgl->cookie[0];
+ for (i = 0; i < numbufs; i++) {
+ uint_t cc = th->num_cookies;
+
+ *cookie_p++ = th->first_cookie;
+ while (--cc > 0) {
+ ddi_dma_nextcookie(th->dma_handle, cookie_p++);
+ }
+ th->num_cookies_fetched = th->num_cookies;
+ th = th->next;
+ }
+ }
+
+ return (STMF_SUCCESS);
+}
+
+void
+qlt_dma_teardown_dbuf(fct_dbuf_store_t *fds, stmf_data_buf_t *dbuf)
+{
+ qlt_state_t *qlt = fds->fds_fca_private;
+ qlt_dma_sgl_t *qsgl = dbuf->db_port_private;
+
+ ASSERT(qlt);
+ ASSERT(qsgl);
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+
+ /*
+ * unbind and free the dma handles
+ */
+ if (qsgl->handle_list) {
+ /* go through ddi handle list */
+ qlt_dma_free_handles(qlt, qsgl->handle_list);
+ }
+ kmem_free(qsgl, qsgl->qsize);
+}
+
+uint8_t
+qlt_get_iocb_count(uint32_t cookie_count)
+{
+ uint32_t cnt, cont_segs;
+ uint8_t iocb_count;
+
+ iocb_count = 1;
+ cnt = CMD7_2400_DATA_SEGMENTS;
+ cont_segs = CONT_A64_DATA_SEGMENTS;
+
+ if (cookie_count > cnt) {
+ cnt = cookie_count - cnt;
+ iocb_count = (uint8_t)(iocb_count + cnt / cont_segs);
+ if (cnt % cont_segs) {
+ iocb_count++;
+ }
+ }
+ return (iocb_count);
+}
--- a/usr/src/uts/common/io/comstar/port/qlt/qlt_dma.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/port/qlt/qlt_dma.h Wed May 05 10:23:23 2010 -0700
@@ -25,8 +25,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _QLT_DMA_H
@@ -38,13 +37,64 @@
extern "C" {
#endif
+/*
+ * DMA memory object.
+ */
+#define QLT_DMA_SG_LIST_LENGTH 1270
+#define CMD7_2400_DATA_SEGMENTS 1
+#define CONT_A64_DATA_SEGMENTS 5
+
+
+/*
+ * Container for ddi_dma_handle
+ *
+ * These elements are either linked to an active dbuf or in the free list.
+ */
+struct qlt_dma_handle {
+ struct qlt_dma_handle *next;
+ ddi_dma_handle_t dma_handle;
+ ddi_dma_cookie_t first_cookie;
+ uint_t num_cookies;
+ uint_t num_cookies_fetched;
+};
+
+typedef struct qlt_dma_handle qlt_dma_handle_t;
+
+/*
+ * The dbuf private data when using a scatter/gather list.
+ */
+struct qlt_dma_sgl {
+ uint16_t handle_count;
+ uint16_t cookie_count;
+ uint16_t cookie_next_fetch;
+ uint16_t cookie_prefetched;
+ qlt_dma_handle_t *handle_list;
+ qlt_dma_handle_t *handle_next_fetch;
+ size_t qsize;
+ ddi_dma_cookie_t cookie[1];
+};
+
+typedef struct qlt_dma_sgl qlt_dma_sgl_t;
+
+/*
+ * Structure to maintain ddi_dma_handle free pool.
+ */
+struct qlt_dma_handle_pool {
+ kmutex_t pool_lock; /* protects all fields */
+ qlt_dma_handle_t *free_list;
+ int num_free;
+ int num_total;
+};
+
+typedef struct qlt_dma_handle_pool qlt_dma_handle_pool_t;
+
struct qlt_dmem_bucket;
typedef struct qlt_dmem_bctl {
struct qlt_dmem_bucket *bctl_bucket;
struct qlt_dmem_bctl *bctl_next;
uint64_t bctl_dev_addr;
- uint8_t bctl_task_ndx;
+ uint8_t bctl_task_ndx; /* notused */
stmf_data_buf_t *bctl_buf;
} qlt_dmem_bctl_t;
@@ -63,13 +113,27 @@
fct_status_t qlt_dmem_init(qlt_state_t *qlt);
void qlt_dmem_fini(qlt_state_t *qlt);
+void qlt_dma_handle_pool_init(qlt_state_t *qlt);
+void qlt_dma_handle_pool_fini(qlt_state_t *qlt);
stmf_data_buf_t *qlt_dmem_alloc(fct_local_port_t *port, uint32_t size,
uint32_t *pminsize, uint32_t flags);
stmf_data_buf_t *qlt_i_dmem_alloc(qlt_state_t *qlt, uint32_t size,
uint32_t *pminsize, uint32_t flags);
void qlt_dmem_free(fct_dbuf_store_t *fds, stmf_data_buf_t *dbuf);
void qlt_i_dmem_free(qlt_state_t *qlt, stmf_data_buf_t *dbuf);
+stmf_status_t qlt_dma_setup_dbuf(fct_local_port_t *port,
+ stmf_data_buf_t *dbuf, uint32_t flags);
+void qlt_dma_teardown_dbuf(fct_dbuf_store_t *fds, stmf_data_buf_t *dbuf);
void qlt_dmem_dma_sync(stmf_data_buf_t *dbuf, uint_t sync_type);
+uint8_t qlt_get_iocb_count(uint32_t cookie_cnt);
+uint64_t qlt_ddi_vtop(caddr_t vaddr);
+/*
+ * XXX move the following into the fct layer
+ */
+uint16_t qlt_get_cookie_count(stmf_data_buf_t *dbuf);
+void qlt_ddi_dma_nextcookie(stmf_data_buf_t *dbuf, ddi_dma_cookie_t *cookie_p);
+ddi_dma_cookie_t *qlt_get_cookie_array(stmf_data_buf_t *dbuf);
+
#ifdef __cplusplus
}
--- a/usr/src/uts/common/io/comstar/port/qlt/qlt_open.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/port/qlt/qlt_open.h Wed May 05 10:23:23 2010 -0700
@@ -45,7 +45,7 @@
#endif
#ifndef QLT_VERSION
-#define QLT_VERSION "20091202-1.04"
+#define QLT_VERSION "20100505-1.05"
#endif
#ifndef QLT_NAME
--- a/usr/src/uts/common/io/comstar/stmf/stmf.c Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/io/comstar/stmf/stmf.c Wed May 05 10:23:23 2010 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/conf.h>
@@ -2157,7 +2157,10 @@
stmf_size = stmf_sizes[struct_id].shared +
stmf_sizes[struct_id].fw_private + additional_size;
- sh = (__stmf_t *)kmem_zalloc(stmf_size, kmem_flag);
+ if (flags & AF_DONTZERO)
+ sh = (__stmf_t *)kmem_alloc(stmf_size, kmem_flag);
+ else
+ sh = (__stmf_t *)kmem_zalloc(stmf_size, kmem_flag);
if (sh == NULL)
return (NULL);
@@ -4242,6 +4245,54 @@
return (NULL);
}
+stmf_status_t
+stmf_setup_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t flags)
+{
+ stmf_i_scsi_task_t *itask =
+ (stmf_i_scsi_task_t *)task->task_stmf_private;
+ stmf_local_port_t *lport = task->task_lport;
+ uint8_t ndx;
+ stmf_status_t ret;
+
+ ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
+ ASSERT(lport->lport_ds->ds_setup_dbuf != NULL);
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+
+ if ((task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) == 0)
+ return (STMF_FAILURE);
+ if (lport->lport_ds->ds_setup_dbuf == NULL)
+ return (STMF_FAILURE);
+
+ ndx = stmf_first_zero[itask->itask_allocated_buf_map];
+ if (ndx == 0xff)
+ return (STMF_FAILURE);
+ ret = lport->lport_ds->ds_setup_dbuf(task, dbuf, flags);
+ if (ret == STMF_FAILURE)
+ return (STMF_FAILURE);
+ itask->itask_dbufs[ndx] = dbuf;
+ task->task_cur_nbufs++;
+ itask->itask_allocated_buf_map |= (1 << ndx);
+ dbuf->db_handle = ndx;
+
+ return (STMF_SUCCESS);
+}
+
+void
+stmf_teardown_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf)
+{
+ stmf_i_scsi_task_t *itask =
+ (stmf_i_scsi_task_t *)task->task_stmf_private;
+ stmf_local_port_t *lport = task->task_lport;
+
+ ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
+ ASSERT(lport->lport_ds->ds_teardown_dbuf != NULL);
+ ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
+
+ itask->itask_allocated_buf_map &= ~(1 << dbuf->db_handle);
+ task->task_cur_nbufs--;
+ lport->lport_ds->ds_teardown_dbuf(lport->lport_ds, dbuf);
+}
+
void
stmf_free_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf)
{
@@ -4276,7 +4327,6 @@
stmf_i_scsi_task_t *itask;
stmf_i_scsi_task_t **ppitask;
scsi_task_t *task;
- uint64_t *p;
uint8_t *l;
stmf_lun_map_ent_t *lun_map_ent;
uint16_t cdb_length;
@@ -4331,10 +4381,20 @@
} while (0);
if (!new_task) {
+ /*
+ * Save the task_cdb pointer and zero per cmd fields.
+ * We know the task_cdb_length is large enough by task
+ * selection process above.
+ */
+ uint8_t *save_cdb;
+ uintptr_t t_start, t_end;
+
task = itask->itask_task;
- task->task_timeout = 0;
- p = (uint64_t *)&task->task_flags;
- *p++ = 0; *p++ = 0; p++; p++; *p++ = 0; *p++ = 0; *p = 0;
+ save_cdb = task->task_cdb; /* save */
+ t_start = (uintptr_t)&task->task_flags;
+ t_end = (uintptr_t)&task->task_extended_cmd;
+ bzero((void *)t_start, (size_t)(t_end - t_start));
+ task->task_cdb = save_cdb; /* restore */
itask->itask_ncmds = 0;
} else {
task = (scsi_task_t *)stmf_alloc(STMF_STRUCT_SCSI_TASK,
@@ -4596,25 +4656,38 @@
int i;
uint8_t map;
- if ((map = itask->itask_allocated_buf_map) != 0) {
- for (i = 0; i < 4; i++) {
- if (map & 1) {
- stmf_data_buf_t *dbuf;
-
- dbuf = itask->itask_dbufs[i];
- if (dbuf->db_lu_private) {
- dbuf->db_lu_private = NULL;
- }
- if (dbuf->db_xfer_start_timestamp != NULL) {
- stmf_lport_xfer_done(itask, dbuf);
- }
+ if ((map = itask->itask_allocated_buf_map) == 0)
+ return;
+ for (i = 0; i < 4; i++) {
+ if (map & 1) {
+ stmf_data_buf_t *dbuf;
+
+ dbuf = itask->itask_dbufs[i];
+ if (dbuf->db_xfer_start_timestamp) {
+ stmf_lport_xfer_done(itask, dbuf);
+ }
+ if (dbuf->db_flags & DB_LU_DATA_BUF) {
+ /*
+ * LU needs to clean up buffer.
+ * LU is required to free the buffer
+ * in the xfer_done handler.
+ */
+ scsi_task_t *task = itask->itask_task;
+ stmf_lu_t *lu = task->task_lu;
+
+ lu->lu_dbuf_free(task, dbuf);
+ ASSERT(((itask->itask_allocated_buf_map>>i)
+ & 1) == 0); /* must be gone */
+ } else {
+ ASSERT(dbuf->db_lu_private == NULL);
+ dbuf->db_lu_private = NULL;
lport->lport_ds->ds_free_data_buf(
lport->lport_ds, dbuf);
}
- map >>= 1;
- }
- itask->itask_allocated_buf_map = 0;
- }
+ }
+ map >>= 1;
+ }
+ itask->itask_allocated_buf_map = 0;
}
void
--- a/usr/src/uts/common/sys/fct.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/sys/fct.h Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _FCT_H
#define _FCT_H
@@ -195,6 +194,13 @@
uint32_t size, uint32_t *pminsize, uint32_t flags);
void (*fds_free_data_buf)(struct fct_dbuf_store *fds,
stmf_data_buf_t *dbuf);
+ stmf_status_t (*fds_setup_dbuf)(struct fct_local_port *port,
+ stmf_data_buf_t *dbuf, uint32_t flags);
+ void (*fds_teardown_dbuf)(struct fct_dbuf_store *fds,
+ stmf_data_buf_t *dbuf);
+
+ uint32_t fds_max_sgl_xfer_len;
+ uint32_t fds_copy_threshold;
} fct_dbuf_store_t;
#define FCT_FCA_MODREV_1 1
--- a/usr/src/uts/common/sys/lpif.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/sys/lpif.h Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _LPIF_H
#define _LPIF_H
@@ -74,6 +73,8 @@
int eventid, void *arg, uint32_t flags);
void *lu_proxy_reg_arg;
uint32_t lu_proxy_reg_arg_len;
+ void (*lu_dbuf_free)(struct scsi_task *task,
+ struct stmf_data_buf *dbuf);
} stmf_lu_t;
/*
--- a/usr/src/uts/common/sys/portif.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/sys/portif.h Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _PORTIF_H
#define _PORTIF_H
@@ -41,8 +40,15 @@
stmf_data_buf_t *(*ds_alloc_data_buf)(struct scsi_task *task,
uint32_t size, uint32_t *pminsize, uint32_t flags);
- void (*ds_free_data_buf)(
- struct stmf_dbuf_store *ds, stmf_data_buf_t *dbuf);
+
+ void (*ds_free_data_buf)(
+ struct stmf_dbuf_store *ds, stmf_data_buf_t *dbuf);
+
+ stmf_status_t (*ds_setup_dbuf)(struct scsi_task *task,
+ stmf_data_buf_t *dbuf, uint32_t flags);
+
+ void (*ds_teardown_dbuf)(
+ struct stmf_dbuf_store *ds, stmf_data_buf_t *dbuf);
} stmf_dbuf_store_t;
#define PORTIF_REV_1 0x00010000
--- a/usr/src/uts/common/sys/stmf.h Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/common/sys/stmf.h Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _STMF_H
#define _STMF_H
@@ -62,6 +61,22 @@
#define COMPANY_ID_NONE 0xFFFFFFFF
#define COMPANY_ID_SUN 0x00144F
+/*
+ * The scatter/gather list buffer format is used in 2 different
+ * contexts within stmf:
+ * 1) supplied by the port provider that the LU provider uses to exchange
+ * data with the backing store.
+ * 2) supplied by the LU provider that the port provider uses exchange
+ * data with the host initiator.
+ * The second format is optionally supported by the port provided as
+ * indicated by the command task flags.
+ */
+
+typedef struct stmf_sglist_ent {
+ uint32_t seg_length;
+ uint8_t *seg_addr;
+} stmf_sglist_ent_t;
+
typedef struct stmf_data_buf {
void *db_stmf_private;
void *db_port_private;
@@ -74,10 +89,7 @@
stmf_status_t db_xfer_status;
uint8_t db_handle; /* To track parallel buffers */
hrtime_t db_xfer_start_timestamp;
- struct stmf_sglist_ent {
- uint32_t seg_length;
- uint8_t *seg_addr;
- } db_sglist[1];
+ stmf_sglist_ent_t db_sglist[1]; /* PP scatter/gather list */
} stmf_data_buf_t;
/*
@@ -89,6 +101,7 @@
#define DB_STATUS_GOOD_SENT 0x0008
#define DB_DONT_CACHE 0x0010
#define DB_DONT_REUSE 0x0020
+#define DB_LU_DATA_BUF 0x0040
typedef struct scsi_task {
void *task_stmf_private;
@@ -119,6 +132,10 @@
/* Fields to manage data phase */
uint32_t task_cmd_xfer_length; /* xfer len based on CDB */
uint32_t task_nbytes_transferred;
+ uint32_t task_max_xfer_len; /* largest xfer allowed */
+ uint32_t task_1st_xfer_len; /* 1st xfer hint */
+ uint32_t task_copy_threshold; /* copy reduction threshold */
+
/* Status Phase */
stmf_status_t task_completion_status;
@@ -180,6 +197,10 @@
#define TASK_AF_ENABLE_COMP_CONF 0x01
#define TASK_AF_PORT_LOAD_HIGH 0x02
#define TASK_AF_NO_EXPECTED_XFER_LENGTH 0x04
+/*
+ * PP sets this flag if it can process dbufs created by the LU.
+ */
+#define TASK_AF_ACCEPT_LU_DBUF 0x08
/*
* scsi_task_t extension identifiers
@@ -208,9 +229,10 @@
* struct allocation flags
*/
#define AF_FORCE_NOSLEEP 0x0001
+#define AF_DONTZERO 0x0002
typedef struct stmf_state_change_info {
- uint64_t st_rflags; /* Reason behin this change */
+ uint64_t st_rflags; /* Reason behind this change */
char *st_additional_info;
} stmf_state_change_info_t;
@@ -343,6 +365,9 @@
stmf_data_buf_t *stmf_alloc_dbuf(scsi_task_t *task, uint32_t size,
uint32_t *pminsize, uint32_t flags);
void stmf_free_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf);
+stmf_status_t stmf_setup_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf,
+ uint32_t flags);
+void stmf_teardown_dbuf(scsi_task_t *task, stmf_data_buf_t *dbuf);
stmf_status_t stmf_xfer_data(scsi_task_t *task, stmf_data_buf_t *dbuf,
uint32_t ioflags);
stmf_status_t stmf_send_scsi_status(scsi_task_t *task, uint32_t ioflags);
--- a/usr/src/uts/intel/stmf_sbd/Makefile Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/intel/stmf_sbd/Makefile Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
#
# This makefile drives the production of the stmf_sbd driver for
# COMSTAR.
@@ -57,7 +56,7 @@
# Overrides and depends_on
#
MODSTUBS_DIR = $(OBJS_DIR)
-LDFLAGS += -dy -Ndrv/stmf
+LDFLAGS += -dy -Ndrv/stmf -Nfs/zfs
INC_PATH += -I$(UTSBASE)/common/fs/zfs
INC_PATH += -I$(UTSBASE)/common/io/comstar/lu/stmf_sbd
--- a/usr/src/uts/sparc/stmf_sbd/Makefile Wed May 05 10:34:37 2010 -0400
+++ b/usr/src/uts/sparc/stmf_sbd/Makefile Wed May 05 10:23:23 2010 -0700
@@ -19,8 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
#
# This makefile drives the production of the stmf_sbd driver for
# COMSTAR.
@@ -57,7 +56,7 @@
# Overrides and depends_on
#
MODSTUBS_DIR = $(OBJS_DIR)
-LDFLAGS += -dy -Ndrv/stmf
+LDFLAGS += -dy -Ndrv/stmf -Nfs/zfs
INC_PATH += -I$(UTSBASE)/common/fs/zfs
INC_PATH += -I$(UTSBASE)/common/io/comstar/lu/stmf_sbd