--- a/usr/src/uts/common/fs/zfs/spa.c Mon Nov 23 16:18:43 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/spa.c Mon Nov 23 15:29:44 2009 -0800
@@ -62,24 +62,28 @@
#include <sys/zfs_ioctl.h>
#ifdef _KERNEL
+#include <sys/bootprops.h>
+#include <sys/callb.h>
+#include <sys/cpupart.h>
+#include <sys/pool.h>
+#include <sys/sysdc.h>
#include <sys/zone.h>
-#include <sys/bootprops.h>
#endif /* _KERNEL */
#include "zfs_prop.h"
#include "zfs_comutil.h"
-enum zti_modes {
+typedef enum zti_modes {
zti_mode_fixed, /* value is # of threads (min 1) */
zti_mode_online_percent, /* value is % of online CPUs */
- zti_mode_tune, /* fill from zio_taskq_tune_* */
+ zti_mode_batch, /* cpu-intensive; value is ignored */
zti_mode_null, /* don't create a taskq */
zti_nmodes
-};
+} zti_modes_t;
#define ZTI_FIX(n) { zti_mode_fixed, (n) }
#define ZTI_PCT(n) { zti_mode_online_percent, (n) }
-#define ZTI_TUNE { zti_mode_tune, 0 }
+#define ZTI_BATCH { zti_mode_batch, 0 }
#define ZTI_NULL { zti_mode_null, 0 }
#define ZTI_ONE ZTI_FIX(1)
@@ -90,7 +94,7 @@
} zio_taskq_info_t;
static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
- "issue", "issue_high", "intr", "intr_high"
+ "issue", "issue_high", "intr", "intr_high"
};
/*
@@ -100,19 +104,29 @@
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_FIX(8), ZTI_NULL, ZTI_TUNE, ZTI_NULL },
- { ZTI_TUNE, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) },
+ { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL },
+ { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
-enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent;
-uint_t zio_taskq_tune_value = 80; /* #threads = 80% of # online CPUs */
-
static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
+uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
+id_t zio_taskq_psrset_bind = PS_NONE;
+boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
+uint_t zio_taskq_basedc = 80; /* base duty cycle */
+
+boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
+
+/*
+ * This (illegal) pool name is used when temporarily importing a spa_t in order
+ * to get the vdev stats associated with the imported devices.
+ */
+#define TRYIMPORT_NAME "$import"
+
/*
* ==========================================================================
* SPA properties routines
@@ -584,6 +598,139 @@
offsetof(spa_error_entry_t, se_avl));
}
+static taskq_t *
+spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
+ uint_t value)
+{
+ uint_t flags = TASKQ_PREPOPULATE;
+ boolean_t batch = B_FALSE;
+
+ switch (mode) {
+ case zti_mode_null:
+ return (NULL); /* no taskq needed */
+
+ case zti_mode_fixed:
+ ASSERT3U(value, >=, 1);
+ value = MAX(value, 1);
+ break;
+
+ case zti_mode_batch:
+ batch = B_TRUE;
+ flags |= TASKQ_THREADS_CPU_PCT;
+ value = zio_taskq_batch_pct;
+ break;
+
+ case zti_mode_online_percent:
+ flags |= TASKQ_THREADS_CPU_PCT;
+ break;
+
+ default:
+ panic("unrecognized mode for %s taskq (%u:%u) in "
+ "spa_activate()",
+ name, mode, value);
+ break;
+ }
+
+ if (zio_taskq_sysdc && spa->spa_proc != &p0) {
+ if (batch)
+ flags |= TASKQ_DC_BATCH;
+
+ return (taskq_create_sysdc(name, value, 50, INT_MAX,
+ spa->spa_proc, zio_taskq_basedc, flags));
+ }
+ return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
+ spa->spa_proc, flags));
+}
+
+static void
+spa_create_zio_taskqs(spa_t *spa)
+{
+ for (int t = 0; t < ZIO_TYPES; t++) {
+ for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
+ const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
+ enum zti_modes mode = ztip->zti_mode;
+ uint_t value = ztip->zti_value;
+ char name[32];
+
+ (void) snprintf(name, sizeof (name),
+ "%s_%s", zio_type_name[t], zio_taskq_types[q]);
+
+ spa->spa_zio_taskq[t][q] =
+ spa_taskq_create(spa, name, mode, value);
+ }
+ }
+}
+
+#ifdef _KERNEL
+static void
+spa_thread(void *arg)
+{
+ callb_cpr_t cprinfo;
+
+ spa_t *spa = arg;
+ user_t *pu = PTOU(curproc);
+
+ CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
+ spa->spa_name);
+
+ ASSERT(curproc != &p0);
+ (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
+ "zpool-%s", spa->spa_name);
+ (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
+
+ /* bind this thread to the requested psrset */
+ if (zio_taskq_psrset_bind != PS_NONE) {
+ pool_lock();
+ mutex_enter(&cpu_lock);
+ mutex_enter(&pidlock);
+ mutex_enter(&curproc->p_lock);
+
+ if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
+ 0, NULL, NULL) == 0) {
+ curthread->t_bind_pset = zio_taskq_psrset_bind;
+ } else {
+ cmn_err(CE_WARN,
+ "Couldn't bind process for zfs pool \"%s\" to "
+ "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
+ }
+
+ mutex_exit(&curproc->p_lock);
+ mutex_exit(&pidlock);
+ mutex_exit(&cpu_lock);
+ pool_unlock();
+ }
+
+ if (zio_taskq_sysdc) {
+ sysdc_thread_enter(curthread, 100, 0);
+ }
+
+ spa->spa_proc = curproc;
+ spa->spa_did = curthread->t_did;
+
+ spa_create_zio_taskqs(spa);
+
+ mutex_enter(&spa->spa_proc_lock);
+ ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
+
+ spa->spa_proc_state = SPA_PROC_ACTIVE;
+ cv_broadcast(&spa->spa_proc_cv);
+
+ CALLB_CPR_SAFE_BEGIN(&cprinfo);
+ while (spa->spa_proc_state == SPA_PROC_ACTIVE)
+ cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
+ CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
+
+ ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
+ spa->spa_proc_state = SPA_PROC_GONE;
+ spa->spa_proc = &p0;
+ cv_broadcast(&spa->spa_proc_cv);
+ CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */
+
+ mutex_enter(&curproc->p_lock);
+ lwp_exit();
+}
+#endif
+
/*
* Activate an uninitialized pool.
*/
@@ -598,53 +745,38 @@
spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
- for (int t = 0; t < ZIO_TYPES; t++) {
- for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
- const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
- enum zti_modes mode = ztip->zti_mode;
- uint_t value = ztip->zti_value;
- char name[32];
-
- (void) snprintf(name, sizeof (name),
- "%s_%s", zio_type_name[t], zio_taskq_types[q]);
-
- if (mode == zti_mode_tune) {
- mode = zio_taskq_tune_mode;
- value = zio_taskq_tune_value;
- if (mode == zti_mode_tune)
- mode = zti_mode_online_percent;
+ /* Try to create a covering process */
+ mutex_enter(&spa->spa_proc_lock);
+ ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
+ ASSERT(spa->spa_proc == &p0);
+ spa->spa_did = 0;
+
+ /* Only create a process if we're going to be around a while. */
+ if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
+ if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
+ NULL, 0) == 0) {
+ spa->spa_proc_state = SPA_PROC_CREATED;
+ while (spa->spa_proc_state == SPA_PROC_CREATED) {
+ cv_wait(&spa->spa_proc_cv,
+ &spa->spa_proc_lock);
}
-
- switch (mode) {
- case zti_mode_fixed:
- ASSERT3U(value, >=, 1);
- value = MAX(value, 1);
-
- spa->spa_zio_taskq[t][q] = taskq_create(name,
- value, maxclsyspri, 50, INT_MAX,
- TASKQ_PREPOPULATE);
- break;
-
- case zti_mode_online_percent:
- spa->spa_zio_taskq[t][q] = taskq_create(name,
- value, maxclsyspri, 50, INT_MAX,
- TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);
- break;
-
- case zti_mode_null:
- spa->spa_zio_taskq[t][q] = NULL;
- break;
-
- case zti_mode_tune:
- default:
- panic("unrecognized mode for "
- "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) "
- "in spa_activate()",
- t, q, mode, value);
- break;
- }
+ ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
+ ASSERT(spa->spa_proc != &p0);
+ ASSERT(spa->spa_did != 0);
+ } else {
+#ifdef _KERNEL
+ cmn_err(CE_WARN,
+ "Couldn't create process for zfs pool \"%s\"\n",
+ spa->spa_name);
+#endif
}
}
+ mutex_exit(&spa->spa_proc_lock);
+
+ /* If we didn't create a process, we need to create our taskqs. */
+ if (spa->spa_proc == &p0) {
+ spa_create_zio_taskqs(spa);
+ }
list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_config_dirty_node));
@@ -703,6 +835,31 @@
avl_destroy(&spa->spa_errlist_last);
spa->spa_state = POOL_STATE_UNINITIALIZED;
+
+ mutex_enter(&spa->spa_proc_lock);
+ if (spa->spa_proc_state != SPA_PROC_NONE) {
+ ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
+ spa->spa_proc_state = SPA_PROC_DEACTIVATE;
+ cv_broadcast(&spa->spa_proc_cv);
+ while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
+ ASSERT(spa->spa_proc != &p0);
+ cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
+ }
+ ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
+ spa->spa_proc_state = SPA_PROC_NONE;
+ }
+ ASSERT(spa->spa_proc == &p0);
+ mutex_exit(&spa->spa_proc_lock);
+
+ /*
+ * We want to make sure spa_thread() has actually exited the ZFS
+ * module, so that the module can't be unloaded out from underneath
+ * it.
+ */
+ if (spa->spa_did != 0) {
+ thread_join(spa->spa_did);
+ spa->spa_did = 0;
+ }
}
/*
@@ -2999,13 +3156,6 @@
return (0);
}
-
-/*
- * This (illegal) pool name is used when temporarily importing a spa_t in order
- * to get the vdev stats associated with the imported devices.
- */
-#define TRYIMPORT_NAME "$import"
-
nvlist_t *
spa_tryimport(nvlist_t *tryconfig)
{