usr/src/uts/common/fs/zfs/vdev_queue.c
author Chris Kirby <Chris.Kirby@oracle.com>
Thu, 10 Jun 2010 15:46:47 -0600
changeset 12605 6790e683d5a5
parent 11146 7e58f40bcb1c
child 13869 921a99998bb4
permissions -rw-r--r--
6959846 DMU traverse prefetch size should be a global tunable
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     1
/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     2
 * CDDL HEADER START
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     3
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     4
 * The contents of this file are subject to the terms of the
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
     5
 * Common Development and Distribution License (the "License").
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
     6
 * You may not use this file except in compliance with the License.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     7
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     8
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     9
 * or http://www.opensolaris.org/os/licensing.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    10
 * See the License for the specific language governing permissions
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    11
 * and limitations under the License.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    12
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    13
 * When distributing Covered Code, include this CDDL HEADER in each
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    14
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    15
 * If applicable, add the following below this CDDL HEADER, with the
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    16
 * fields enclosed by brackets "[]" replaced with your own identifying
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    17
 * information: Portions Copyright [yyyy] [name of copyright owner]
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    18
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    19
 * CDDL HEADER END
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    20
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    21
/*
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
    22
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    23
 * Use is subject to license terms.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    24
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    25
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    26
#include <sys/zfs_context.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    27
#include <sys/vdev_impl.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    28
#include <sys/zio.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    29
#include <sys/avl.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    30
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    31
/*
3059
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    32
 * These tunables are for performance analysis.
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    33
 */
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    34
/*
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    35
 * zfs_vdev_max_pending is the maximum number of i/os concurrently
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    36
 * pending to each device.  zfs_vdev_min_pending is the initial number
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    37
 * of i/os pending to each device (before it starts ramping up to
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    38
 * max_pending).
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    39
 */
10801
e0bf032e8673 6822816 assertion failed: zap_remove_int(ds_next_clones_obj) returns ENOENT
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents: 10105
diff changeset
    40
int zfs_vdev_max_pending = 10;
3059
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    41
int zfs_vdev_min_pending = 4;
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    42
11066
cebb50cbe4f9 PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents: 10922
diff changeset
    43
/* deadline = pri + ddi_get_lbolt64() >> time_shift) */
3059
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    44
int zfs_vdev_time_shift = 6;
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    45
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    46
/* exponential I/O issue ramp-up rate */
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    47
int zfs_vdev_ramp_rate = 2;
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    48
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    49
/*
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
    50
 * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
    51
 * For read I/Os, we also aggregate across small adjacency gaps; for writes
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
    52
 * we include spans of optional I/Os to aid aggregation at the disk even when
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
    53
 * they aren't able to help us aggregate at this level.
3059
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    54
 */
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    55
int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
    56
int zfs_vdev_read_gap_limit = 32 << 10;
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
    57
int zfs_vdev_write_gap_limit = 4 << 10;
3059
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    58
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
    59
/*
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    60
 * Virtual device vector for disk I/O scheduling.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    61
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    62
int
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    63
vdev_queue_deadline_compare(const void *x1, const void *x2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    64
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    65
	const zio_t *z1 = x1;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    66
	const zio_t *z2 = x2;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    67
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    68
	if (z1->io_deadline < z2->io_deadline)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    69
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    70
	if (z1->io_deadline > z2->io_deadline)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    71
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    72
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    73
	if (z1->io_offset < z2->io_offset)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    74
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    75
	if (z1->io_offset > z2->io_offset)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    76
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    77
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    78
	if (z1 < z2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    79
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    80
	if (z1 > z2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    81
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    82
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    83
	return (0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    84
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    85
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    86
int
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    87
vdev_queue_offset_compare(const void *x1, const void *x2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    88
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    89
	const zio_t *z1 = x1;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    90
	const zio_t *z2 = x2;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    91
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    92
	if (z1->io_offset < z2->io_offset)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    93
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    94
	if (z1->io_offset > z2->io_offset)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    95
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    96
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    97
	if (z1 < z2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    98
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    99
	if (z1 > z2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   100
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   101
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   102
	return (0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   103
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   104
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   105
void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   106
vdev_queue_init(vdev_t *vd)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   107
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   108
	vdev_queue_t *vq = &vd->vdev_queue;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   109
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   110
	mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   111
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   112
	avl_create(&vq->vq_deadline_tree, vdev_queue_deadline_compare,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   113
	    sizeof (zio_t), offsetof(struct zio, io_deadline_node));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   114
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   115
	avl_create(&vq->vq_read_tree, vdev_queue_offset_compare,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   116
	    sizeof (zio_t), offsetof(struct zio, io_offset_node));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   117
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   118
	avl_create(&vq->vq_write_tree, vdev_queue_offset_compare,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   119
	    sizeof (zio_t), offsetof(struct zio, io_offset_node));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   120
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   121
	avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   122
	    sizeof (zio_t), offsetof(struct zio, io_offset_node));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   123
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   124
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   125
void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   126
vdev_queue_fini(vdev_t *vd)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   127
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   128
	vdev_queue_t *vq = &vd->vdev_queue;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   129
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   130
	avl_destroy(&vq->vq_deadline_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   131
	avl_destroy(&vq->vq_read_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   132
	avl_destroy(&vq->vq_write_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   133
	avl_destroy(&vq->vq_pending_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   134
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   135
	mutex_destroy(&vq->vq_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   136
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   137
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   138
static void
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   139
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   140
{
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   141
	avl_add(&vq->vq_deadline_tree, zio);
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   142
	avl_add(zio->io_vdev_tree, zio);
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   143
}
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   144
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   145
static void
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   146
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   147
{
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   148
	avl_remove(&vq->vq_deadline_tree, zio);
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   149
	avl_remove(zio->io_vdev_tree, zio);
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   150
}
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   151
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   152
static void
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   153
vdev_queue_agg_io_done(zio_t *aio)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   154
{
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   155
	zio_t *pio;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   156
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   157
	while ((pio = zio_walk_parents(aio)) != NULL)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   158
		if (aio->io_type == ZIO_TYPE_READ)
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   159
			bcopy((char *)aio->io_data + (pio->io_offset -
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   160
			    aio->io_offset), pio->io_data, pio->io_size);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   161
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   162
	zio_buf_free(aio->io_data, aio->io_size);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   163
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   164
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   165
/*
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   166
 * Compute the range spanned by two i/os, which is the endpoint of the last
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   167
 * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset).
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   168
 * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio);
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   169
 * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0.
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   170
 */
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   171
#define	IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   172
#define	IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   173
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   174
static zio_t *
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   175
vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   176
{
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   177
	zio_t *fio, *lio, *aio, *dio, *nio, *mio;
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   178
	avl_tree_t *t;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   179
	int flags;
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   180
	uint64_t maxspan = zfs_vdev_aggregation_limit;
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   181
	uint64_t maxgap;
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   182
	int stretch;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   183
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   184
again:
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   185
	ASSERT(MUTEX_HELD(&vq->vq_lock));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   186
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   187
	if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit ||
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   188
	    avl_numnodes(&vq->vq_deadline_tree) == 0)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   189
		return (NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   190
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   191
	fio = lio = avl_first(&vq->vq_deadline_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   192
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   193
	t = fio->io_vdev_tree;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   194
	flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT;
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   195
	maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   196
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   197
	if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   198
		/*
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   199
		 * We can aggregate I/Os that are sufficiently adjacent and of
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   200
		 * the same flavor, as expressed by the AGG_INHERIT flags.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   201
		 * The latter requirement is necessary so that certain
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   202
		 * attributes of the I/O, such as whether it's a normal I/O
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   203
		 * or a scrub/resilver, can be preserved in the aggregate.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   204
		 * We can include optional I/Os, but don't allow them
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   205
		 * to begin a range as they add no benefit in that situation.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   206
		 */
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   207
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   208
		/*
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   209
		 * We keep track of the last non-optional I/O.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   210
		 */
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   211
		mio = (fio->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : fio;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   212
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   213
		/*
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   214
		 * Walk backwards through sufficiently contiguous I/Os
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   215
		 * recording the last non-option I/O.
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   216
		 */
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   217
		while ((dio = AVL_PREV(t, fio)) != NULL &&
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   218
		    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   219
		    IO_SPAN(dio, lio) <= maxspan &&
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   220
		    IO_GAP(dio, fio) <= maxgap) {
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   221
			fio = dio;
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   222
			if (mio == NULL && !(fio->io_flags & ZIO_FLAG_OPTIONAL))
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   223
				mio = fio;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   224
		}
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   225
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   226
		/*
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   227
		 * Skip any initial optional I/Os.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   228
		 */
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   229
		while ((fio->io_flags & ZIO_FLAG_OPTIONAL) && fio != lio) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   230
			fio = AVL_NEXT(t, fio);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   231
			ASSERT(fio != NULL);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   232
		}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   233
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   234
		/*
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   235
		 * Walk forward through sufficiently contiguous I/Os.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   236
		 */
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   237
		while ((dio = AVL_NEXT(t, lio)) != NULL &&
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   238
		    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   239
		    IO_SPAN(fio, dio) <= maxspan &&
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   240
		    IO_GAP(lio, dio) <= maxgap) {
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   241
			lio = dio;
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   242
			if (!(lio->io_flags & ZIO_FLAG_OPTIONAL))
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   243
				mio = lio;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   244
		}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   245
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   246
		/*
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   247
		 * Now that we've established the range of the I/O aggregation
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   248
		 * we must decide what to do with trailing optional I/Os.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   249
		 * For reads, there's nothing to do. While we are unable to
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   250
		 * aggregate further, it's possible that a trailing optional
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   251
		 * I/O would allow the underlying device to aggregate with
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   252
		 * subsequent I/Os. We must therefore determine if the next
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   253
		 * non-optional I/O is close enough to make aggregation
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   254
		 * worthwhile.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   255
		 */
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   256
		stretch = B_FALSE;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   257
		if (t != &vq->vq_read_tree && mio != NULL) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   258
			nio = lio;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   259
			while ((dio = AVL_NEXT(t, nio)) != NULL &&
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   260
			    IO_GAP(nio, dio) == 0 &&
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   261
			    IO_GAP(mio, dio) <= zfs_vdev_write_gap_limit) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   262
				nio = dio;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   263
				if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   264
					stretch = B_TRUE;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   265
					break;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   266
				}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   267
			}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   268
		}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   269
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   270
		if (stretch) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   271
			/* This may be a no-op. */
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   272
			VERIFY((dio = AVL_NEXT(t, lio)) != NULL);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   273
			dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   274
		} else {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   275
			while (lio != mio && lio != fio) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   276
				ASSERT(lio->io_flags & ZIO_FLAG_OPTIONAL);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   277
				lio = AVL_PREV(t, lio);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   278
				ASSERT(lio != NULL);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   279
			}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   280
		}
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   281
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   282
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   283
	if (fio != lio) {
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   284
		uint64_t size = IO_SPAN(fio, lio);
3059
7d69dbccfcbb 6472021 vdev knobs can not be turned
ahrens
parents: 1807
diff changeset
   285
		ASSERT(size <= zfs_vdev_aggregation_limit);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   286
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
   287
		aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset,
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   288
		    zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG,
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7754
diff changeset
   289
		    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   290
		    vdev_queue_agg_io_done, NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   291
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   292
		nio = fio;
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   293
		do {
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   294
			dio = nio;
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   295
			nio = AVL_NEXT(t, dio);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   296
			ASSERT(dio->io_type == aio->io_type);
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   297
			ASSERT(dio->io_vdev_tree == t);
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   298
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   299
			if (dio->io_flags & ZIO_FLAG_NODATA) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   300
				ASSERT(dio->io_type == ZIO_TYPE_WRITE);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   301
				bzero((char *)aio->io_data + (dio->io_offset -
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   302
				    aio->io_offset), dio->io_size);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   303
			} else if (dio->io_type == ZIO_TYPE_WRITE) {
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   304
				bcopy(dio->io_data, (char *)aio->io_data +
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   305
				    (dio->io_offset - aio->io_offset),
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   306
				    dio->io_size);
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   307
			}
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   308
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   309
			zio_add_child(dio, aio);
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   310
			vdev_queue_io_remove(vq, dio);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   311
			zio_vdev_io_bypass(dio);
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   312
			zio_execute(dio);
8692
692d4668b40d 6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 8632
diff changeset
   313
		} while (dio != lio);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   314
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   315
		avl_add(&vq->vq_pending_tree, aio);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   316
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   317
		return (aio);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   318
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   319
8632
36ef517870a3 6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents: 8241
diff changeset
   320
	ASSERT(fio->io_vdev_tree == t);
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   321
	vdev_queue_io_remove(vq, fio);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   322
10105
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   323
	/*
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   324
	 * If the I/O is or was optional and therefore has no data, we need to
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   325
	 * simply discard it. We need to drop the vdev queue's lock to avoid a
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   326
	 * deadlock that we could encounter since this I/O will complete
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   327
	 * immediately.
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   328
	 */
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   329
	if (fio->io_flags & ZIO_FLAG_NODATA) {
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   330
		mutex_exit(&vq->vq_lock);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   331
		zio_vdev_io_bypass(fio);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   332
		zio_execute(fio);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   333
		mutex_enter(&vq->vq_lock);
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   334
		goto again;
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   335
	}
17811c723fb4 6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents: 8692
diff changeset
   336
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   337
	avl_add(&vq->vq_pending_tree, fio);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   338
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   339
	return (fio);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   340
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   341
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   342
zio_t *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   343
vdev_queue_io(zio_t *zio)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   344
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   345
	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   346
	zio_t *nio;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   347
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   348
	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   349
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   350
	if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   351
		return (zio);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   352
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   353
	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   354
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   355
	if (zio->io_type == ZIO_TYPE_READ)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   356
		zio->io_vdev_tree = &vq->vq_read_tree;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   357
	else
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   358
		zio->io_vdev_tree = &vq->vq_write_tree;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   359
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   360
	mutex_enter(&vq->vq_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   361
11066
cebb50cbe4f9 PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents: 10922
diff changeset
   362
	zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) +
cebb50cbe4f9 PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents: 10922
diff changeset
   363
	    zio->io_priority;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   364
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
   365
	vdev_queue_io_add(vq, zio);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   366
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   367
	nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   368
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   369
	mutex_exit(&vq->vq_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   370
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   371
	if (nio == NULL)
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   372
		return (NULL);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   373
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   374
	if (nio->io_done == vdev_queue_agg_io_done) {
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   375
		zio_nowait(nio);
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   376
		return (NULL);
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   377
	}
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   378
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   379
	return (nio);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   380
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   381
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   382
void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   383
vdev_queue_io_done(zio_t *zio)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   384
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   385
	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   386
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   387
	mutex_enter(&vq->vq_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   388
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   389
	avl_remove(&vq->vq_pending_tree, zio);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   390
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
   391
	for (int i = 0; i < zfs_vdev_ramp_rate; i++) {
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
   392
		zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   393
		if (nio == NULL)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   394
			break;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   395
		mutex_exit(&vq->vq_lock);
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   396
		if (nio->io_done == vdev_queue_agg_io_done) {
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   397
			zio_nowait(nio);
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   398
		} else {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   399
			zio_vdev_io_reissue(nio);
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   400
			zio_execute(nio);
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 3697
diff changeset
   401
		}
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   402
		mutex_enter(&vq->vq_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   403
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   404
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   405
	mutex_exit(&vq->vq_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   406
}