--- a/usr/src/cmd/ztest/ztest.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/cmd/ztest/ztest.c Thu Aug 30 05:13:49 2012 -0700
@@ -5835,6 +5835,8 @@
(void) setvbuf(stdout, NULL, _IOLBF, 0);
+ dprintf_setup(&argc, argv);
+
if (!ischild) {
process_options(argc, argv);
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h Thu Aug 30 05:13:49 2012 -0700
@@ -61,6 +61,7 @@
#include <atomic.h>
#include <dirent.h>
#include <time.h>
+#include <procfs.h>
#include <libsysevent.h>
#include <sys/note.h>
#include <sys/types.h>
--- a/usr/src/uts/common/fs/zfs/arc.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c Thu Aug 30 05:13:49 2012 -0700
@@ -135,6 +135,12 @@
#include <sys/kstat.h>
#include <zfs_fletcher.h>
+#ifndef _KERNEL
+/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
+boolean_t arc_watch = B_FALSE;
+int arc_procfd;
+#endif
+
static kmutex_t arc_reclaim_thr_lock;
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
static uint8_t arc_thread_exit;
@@ -474,6 +480,7 @@
static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
static int arc_evict_needed(arc_buf_contents_t type);
static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes);
+static void arc_buf_watch(arc_buf_t *buf);
static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
@@ -949,6 +956,50 @@
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
buf->b_hdr->b_freeze_cksum);
mutex_exit(&buf->b_hdr->b_freeze_lock);
+ arc_buf_watch(buf);
+}
+
+#ifndef _KERNEL
+typedef struct procctl {
+ long cmd;
+ prwatch_t prwatch;
+} procctl_t;
+#endif
+
+/* ARGSUSED */
+static void
+arc_buf_unwatch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+ if (arc_watch) {
+ int result;
+ procctl_t ctl;
+ ctl.cmd = PCWATCH;
+ ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
+ ctl.prwatch.pr_size = 0;
+ ctl.prwatch.pr_wflags = 0;
+ result = write(arc_procfd, &ctl, sizeof (ctl));
+ ASSERT3U(result, ==, sizeof (ctl));
+ }
+#endif
+}
+
+/* ARGSUSED */
+static void
+arc_buf_watch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+ if (arc_watch) {
+ int result;
+ procctl_t ctl;
+ ctl.cmd = PCWATCH;
+ ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
+ ctl.prwatch.pr_size = buf->b_hdr->b_size;
+ ctl.prwatch.pr_wflags = WA_WRITE;
+ result = write(arc_procfd, &ctl, sizeof (ctl));
+ ASSERT3U(result, ==, sizeof (ctl));
+ }
+#endif
}
void
@@ -975,6 +1026,8 @@
}
mutex_exit(&buf->b_hdr->b_freeze_lock);
+
+ arc_buf_unwatch(buf);
}
void
@@ -992,6 +1045,7 @@
buf->b_hdr->b_state == arc_anon);
arc_cksum_compute(buf, B_FALSE);
mutex_exit(hash_lock);
+
}
static void
@@ -1348,21 +1402,22 @@
* the buffer is placed on l2arc_free_on_write to be freed later.
*/
static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
- void *data, size_t size)
+arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
if (HDR_L2_WRITING(hdr)) {
l2arc_data_free_t *df;
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
- df->l2df_data = data;
- df->l2df_size = size;
+ df->l2df_data = buf->b_data;
+ df->l2df_size = hdr->b_size;
df->l2df_func = free_func;
mutex_enter(&l2arc_free_on_write_mtx);
list_insert_head(l2arc_free_on_write, df);
mutex_exit(&l2arc_free_on_write_mtx);
ARCSTAT_BUMP(arcstat_l2_free_on_write);
} else {
- free_func(data, size);
+ free_func(buf->b_data, hdr->b_size);
}
}
@@ -1378,16 +1433,15 @@
arc_buf_contents_t type = buf->b_hdr->b_type;
arc_cksum_verify(buf);
+ arc_buf_unwatch(buf);
if (!recycle) {
if (type == ARC_BUFC_METADATA) {
- arc_buf_data_free(buf->b_hdr, zio_buf_free,
- buf->b_data, size);
+ arc_buf_data_free(buf, zio_buf_free);
arc_space_return(size, ARC_SPACE_DATA);
} else {
ASSERT(type == ARC_BUFC_DATA);
- arc_buf_data_free(buf->b_hdr,
- zio_data_buf_free, buf->b_data, size);
+ arc_buf_data_free(buf, zio_data_buf_free);
ARCSTAT_INCR(arcstat_data_size, -size);
atomic_add_64(&arc_size, -size);
}
@@ -2556,6 +2610,7 @@
}
arc_cksum_compute(buf, B_FALSE);
+ arc_buf_watch(buf);
if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
/*
@@ -3113,6 +3168,7 @@
}
hdr->b_datacnt -= 1;
arc_cksum_verify(buf);
+ arc_buf_unwatch(buf);
mutex_exit(hash_lock);
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Thu Aug 30 05:13:49 2012 -0700
@@ -2302,7 +2302,6 @@
}
}
}
-
}
void
--- a/usr/src/uts/common/fs/zfs/dsl_synctask.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c Thu Aug 30 05:13:49 2012 -0700
@@ -230,12 +230,7 @@
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
{
- dsl_sync_task_group_t *dstg;
-
- if (!spa_writeable(dp->dp_spa))
- return;
-
- dstg = dsl_sync_task_group_create(dp);
+ dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
dsl_sync_task_group_nowait(dstg, tx);
--- a/usr/src/uts/common/fs/zfs/spa_history.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_history.c Thu Aug 30 05:13:49 2012 -0700
@@ -303,7 +303,7 @@
dmu_tx_t *tx;
nvlist_t *nvarg;
- if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY)
+ if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
return (EINVAL);
tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
@@ -439,8 +439,9 @@
/*
* If this is part of creating a pool, not everything is
* initialized yet, so don't bother logging the internal events.
+ * Likewise if the pool is not writeable.
*/
- if (tx->tx_txg == TXG_INITIAL) {
+ if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
fnvlist_free(nvl);
return;
}
--- a/usr/src/uts/common/fs/zfs/spa_misc.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c Thu Aug 30 05:13:49 2012 -0700
@@ -1600,6 +1600,18 @@
spa_mode_global = mode;
+#ifndef _KERNEL
+ if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
+ arc_procfd = open("/proc/self/ctl", O_WRONLY);
+ if (arc_procfd == -1) {
+ perror("could not enable watchpoints: "
+ "opening /proc/self/ctl failed: ");
+ } else {
+ arc_watch = B_TRUE;
+ }
+ }
+#endif
+
refcount_init();
unique_init();
zio_init();
--- a/usr/src/uts/common/fs/zfs/sys/arc.h Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h Thu Aug 30 05:13:49 2012 -0700
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ARC_H
@@ -135,6 +136,11 @@
void l2arc_start(void);
void l2arc_stop(void);
+#ifndef _KERNEL
+extern boolean_t arc_watch;
+extern int arc_procfd;
+#endif
+
#ifdef __cplusplus
}
#endif
--- a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h Thu Aug 30 05:13:49 2012 -0700
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_DEBUG_H
@@ -75,6 +76,10 @@
extern void zfs_dbgmsg_fini(void);
extern void zfs_dbgmsg(const char *fmt, ...);
+#ifndef _KERNEL
+extern int dprintf_find_string(const char *string);
+#endif
+
#ifdef __cplusplus
}
#endif
--- a/usr/src/uts/common/fs/zfs/zio.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c Thu Aug 30 05:13:49 2012 -0700
@@ -125,11 +125,21 @@
while (p2 & (p2 - 1))
p2 &= p2 - 1;
+#ifndef _KERNEL
+ /*
+ * If we are using watchpoints, put each buffer on its own page,
+ * to eliminate the performance overhead of trapping to the
+ * kernel when modifying a non-watched buffer that shares the
+ * page with a watched buffer.
+ */
+ if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
+ continue;
+#endif
if (size <= 4 * SPA_MINBLOCKSIZE) {
align = SPA_MINBLOCKSIZE;
- } else if (P2PHASE(size, PAGESIZE) == 0) {
+ } else if (IS_P2ALIGNED(size, PAGESIZE)) {
align = PAGESIZE;
- } else if (P2PHASE(size, p2 >> 2) == 0) {
+ } else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = p2 >> 2;
}
--- a/usr/src/uts/common/io/devpoll.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/io/devpoll.c Thu Aug 30 05:13:49 2012 -0700
@@ -23,7 +23,9 @@
* Use is subject to license terms.
*/
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
#include <sys/types.h>
#include <sys/devops.h>
@@ -695,9 +697,15 @@
minor_t minor;
dp_entry_t *dpep;
pollcache_t *pcp;
+ hrtime_t now;
int error = 0;
STRUCT_DECL(dvpoll, dvpoll);
+ if (cmd == DP_POLL) {
+ /* do this now, before we sleep on DP_WRITER_PRESENT */
+ now = gethrtime();
+ }
+
minor = getminor(dev);
mutex_enter(&devpoll_lock);
ASSERT(minor < dptblsize);
@@ -725,9 +733,7 @@
pollstate_t *ps;
nfds_t nfds;
int fdcnt = 0;
- int time_out;
- clock_t *deltap = NULL;
- clock_t delta;
+ hrtime_t deadline = 0;
STRUCT_INIT(dvpoll, mode);
error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll),
@@ -737,18 +743,16 @@
return (EFAULT);
}
- time_out = STRUCT_FGET(dvpoll, dp_timeout);
- if (time_out > 0) {
+ deadline = STRUCT_FGET(dvpoll, dp_timeout);
+ if (deadline > 0) {
/*
- * cv_relwaituntil_sig operates at the tick
- * granularity, which by default is 10 ms.
- * This results in rounding user specified
- * timeouts up but prevents the system
- * from being flooded with small high
- * resolution timers.
+ * Convert the deadline from relative milliseconds
+ * to absolute nanoseconds. They must wait for at
+ * least a tick.
*/
- delta = MSEC_TO_TICK_ROUNDUP(time_out);
- deltap = δ
+ deadline = deadline * NANOSEC / MILLISEC;
+ deadline = MAX(deadline, nsec_per_tick);
+ deadline += now;
}
if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) {
@@ -758,16 +762,15 @@
* Do not check for signals if we have a zero timeout.
*/
DP_REFRELE(dpep);
- if (time_out == 0)
+ if (deadline == 0)
return (0);
mutex_enter(&curthread->t_delay_lock);
- while ((delta = cv_relwaituntil_sig(
- &curthread->t_delay_cv, &curthread->t_delay_lock,
- deltap, TR_MILLISEC)) > 0) {
+ while ((error =
+ cv_timedwait_sig_hrtime(&curthread->t_delay_cv,
+ &curthread->t_delay_lock, deadline)) > 0)
continue;
- }
mutex_exit(&curthread->t_delay_lock);
- return (delta == 0 ? EINTR : 0);
+ return (error == 0 ? EINTR : 0);
}
/*
@@ -814,21 +817,22 @@
/*
* Sleep until we are notified, signaled, or timed out.
- * Do not check for signals if we have a zero timeout.
*/
- if (time_out == 0) /* immediate timeout */
+ if (deadline == 0) {
+ /* immediate timeout; do not check signals */
break;
-
- delta = cv_relwaituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
- deltap, TR_MILLISEC);
+ }
+ error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
+ &pcp->pc_lock, deadline);
/*
* If we were awakened by a signal or timeout
* then break the loop, else poll again.
*/
- if (delta <= 0) {
- if (delta == 0) /* signal */
- error = EINTR;
+ if (error <= 0) {
+ error = (error == 0) ? EINTR : 0;
break;
+ } else {
+ error = 0;
}
}
mutex_exit(&pcp->pc_lock);
--- a/usr/src/uts/common/os/condvar.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/os/condvar.c Thu Aug 30 05:13:49 2012 -0700
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
#include <sys/thread.h>
#include <sys/proc.h>
@@ -481,6 +483,21 @@
}
/*
+ * Wait until the specified time.
+ * If tim == -1, waits without timeout using cv_wait_sig_swap().
+ */
+int
+cv_timedwait_sig_hrtime(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim)
+{
+ if (tim == -1) {
+ return (cv_wait_sig_swap(cvp, mp));
+ } else {
+ return (cv_timedwait_sig_hires(cvp, mp, tim, 1,
+ CALLOUT_FLAG_ABSOLUTE | CALLOUT_FLAG_ROUNDUP));
+ }
+}
+
+/*
* Same as cv_timedwait_sig() except that the third argument is a relative
* timeout value, as opposed to an absolute one. There is also a fourth
* argument that specifies how accurately the timeout must be implemented.
@@ -502,30 +519,6 @@
}
/*
- * Same as cv_reltimedwait_sig() except that the timeout is optional. If
- * there is no timeout then the function will block until woken up
- * or interrupted.
- */
-clock_t
-cv_relwaituntil_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t *delta,
- time_res_t res)
-{
- /*
- * If there is no timeout specified wait indefinitely for a
- * signal or a wakeup.
- */
- if (delta == NULL) {
- return (cv_wait_sig_swap(cvp, mp));
- }
-
- /*
- * cv_reltimedwait_sig will wait for the relative timeout
- * specified by delta.
- */
- return (cv_reltimedwait_sig(cvp, mp, *delta, res));
-}
-
-/*
* Like cv_wait_sig_swap but allows the caller to indicate (with a
* non-NULL sigret) that they will take care of signalling the cv
* after wakeup, if necessary. This is a vile hack that should only
@@ -766,6 +759,10 @@
* so the caller can return a premature timeout to the calling process
* so it can reevaluate the situation in light of the new system time.
* (The system clock has been reset if timecheck != timechanged.)
+ *
+ * Generally, cv_timedwait_sig_hrtime() should be used instead of this
+ * routine. It waits based on hrtime rather than wall-clock time and therefore
+ * does not need to deal with the time changing.
*/
int
cv_waituntil_sig(kcondvar_t *cvp, kmutex_t *mp,
--- a/usr/src/uts/common/sys/condvar.h Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/sys/condvar.h Thu Aug 30 05:13:49 2012 -0700
@@ -23,7 +23,9 @@
* Use is subject to license terms.
*/
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
/*
* condvar.h:
@@ -95,10 +97,9 @@
extern clock_t cv_reltimedwait(kcondvar_t *, kmutex_t *, clock_t, time_res_t);
extern int cv_wait_sig(kcondvar_t *, kmutex_t *);
extern clock_t cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
+extern int cv_timedwait_sig_hrtime(kcondvar_t *, kmutex_t *, hrtime_t);
extern clock_t cv_reltimedwait_sig(kcondvar_t *, kmutex_t *, clock_t,
time_res_t);
-extern clock_t cv_relwaituntil_sig(kcondvar_t *, kmutex_t *, clock_t *,
- time_res_t);
extern int cv_wait_sig_swap(kcondvar_t *, kmutex_t *);
extern int cv_wait_sig_swap_core(kcondvar_t *, kmutex_t *, int *);
extern void cv_signal(kcondvar_t *);
--- a/usr/src/uts/common/syscall/poll.c Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/syscall/poll.c Thu Aug 30 05:13:49 2012 -0700
@@ -24,11 +24,14 @@
* Use is subject to license terms.
*/
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
@@ -288,9 +291,7 @@
proc_t *p = ttoproc(t);
int fdcnt = 0;
int i;
- int imm_timeout = 0;
- clock_t *deltap = NULL;
- clock_t delta;
+ hrtime_t deadline; /* hrtime value when we want to return */
pollfd_t *pollfdp;
pollstate_t *ps;
pollcache_t *pcp;
@@ -301,24 +302,15 @@
/*
* Determine the precise future time of the requested timeout, if any.
*/
- if (tsp != NULL) {
- if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
- imm_timeout = 1;
- } else {
- /*
- * cv_relwaituntil_sig operates at
- * the tick granularity, which by default is 10 ms.
- * Convert the specified timespec to ticks, rounding
- * up to at least 1 tick to avoid flooding the
- * system with small high resolution timers.
- */
- delta = SEC_TO_TICK(tsp->tv_sec) +
- NSEC_TO_TICK(tsp->tv_nsec);
- if (delta < 1) {
- delta = 1;
- }
- deltap = δ
- }
+ if (tsp == NULL) {
+ deadline = -1;
+ } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
+ deadline = 0;
+ } else {
+ /* They must wait at least a tick. */
+ deadline = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
+ deadline = MAX(deadline, nsec_per_tick);
+ deadline += gethrtime();
}
/*
@@ -351,16 +343,15 @@
/*
* Sleep until we have passed the requested future
* time or until interrupted by a signal.
- * Do not check for signals if we have a zero timeout.
+ * Do not check for signals if we do not want to wait.
*/
- if (!imm_timeout) {
+ if (deadline != 0) {
mutex_enter(&t->t_delay_lock);
- while ((delta = cv_relwaituntil_sig(&t->t_delay_cv,
- &t->t_delay_lock, deltap, TR_MILLISEC)) > 0)
+ while ((error = cv_timedwait_sig_hrtime(&t->t_delay_cv,
+ &t->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&t->t_delay_lock);
- if (delta == 0)
- error = EINTR;
+ error = (error == 0) ? EINTR : 0;
}
goto pollout;
}
@@ -550,20 +541,19 @@
* Do not check for signals if we have a zero timeout.
*/
mutex_exit(&ps->ps_lock);
- if (imm_timeout) {
- delta = -1;
+ if (deadline == 0) {
+ error = -1;
} else {
- delta = cv_relwaituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
- deltap, TR_MILLISEC);
+ error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
+ &pcp->pc_lock, deadline);
}
mutex_exit(&pcp->pc_lock);
/*
* If we have received a signal or timed out
* then break out and return.
*/
- if (delta <= 0) {
- if (delta == 0)
- error = EINTR;
+ if (error <= 0) {
+ error = (error == 0) ? EINTR : 0;
break;
}
/*