author | Chris Kirby <Chris.Kirby@oracle.com> |
Thu, 10 Jun 2010 15:46:47 -0600 | |
changeset 12605 | 6790e683d5a5 |
parent 11146 | 7e58f40bcb1c |
child 13869 | 921a99998bb4 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
22 |
* Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#include <sys/zfs_context.h> |
|
27 |
#include <sys/vdev_impl.h> |
|
28 |
#include <sys/zio.h> |
|
29 |
#include <sys/avl.h> |
|
30 |
||
31 |
/* |
|
3059 | 32 |
* These tunables are for performance analysis. |
33 |
*/ |
|
34 |
/* |
|
35 |
* zfs_vdev_max_pending is the maximum number of i/os concurrently |
|
36 |
* pending to each device. zfs_vdev_min_pending is the initial number |
|
37 |
* of i/os pending to each device (before it starts ramping up to |
|
38 |
* max_pending). |
|
39 |
*/ |
|
10801
e0bf032e8673
6822816 assertion failed: zap_remove_int(ds_next_clones_obj) returns ENOENT
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
10105
diff
changeset
|
40 |
int zfs_vdev_max_pending = 10; |
3059 | 41 |
int zfs_vdev_min_pending = 4; |
42 |
||
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
10922
diff
changeset
|
43 |
/* deadline = pri + ddi_get_lbolt64() >> time_shift) */ |
3059 | 44 |
int zfs_vdev_time_shift = 6; |
45 |
||
46 |
/* exponential I/O issue ramp-up rate */ |
|
47 |
int zfs_vdev_ramp_rate = 2; |
|
48 |
||
49 |
/* |
|
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
50 |
* To reduce IOPs, we aggregate small adjacent I/Os into one large I/O. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
51 |
* For read I/Os, we also aggregate across small adjacency gaps; for writes |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
52 |
* we include spans of optional I/Os to aid aggregation at the disk even when |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
53 |
* they aren't able to help us aggregate at this level. |
3059 | 54 |
*/ |
55 |
int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE; |
|
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
56 |
int zfs_vdev_read_gap_limit = 32 << 10; |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
57 |
int zfs_vdev_write_gap_limit = 4 << 10; |
3059 | 58 |
|
59 |
/* |
|
789 | 60 |
* Virtual device vector for disk I/O scheduling. |
61 |
*/ |
|
62 |
int |
|
63 |
vdev_queue_deadline_compare(const void *x1, const void *x2) |
|
64 |
{ |
|
65 |
const zio_t *z1 = x1; |
|
66 |
const zio_t *z2 = x2; |
|
67 |
||
68 |
if (z1->io_deadline < z2->io_deadline) |
|
69 |
return (-1); |
|
70 |
if (z1->io_deadline > z2->io_deadline) |
|
71 |
return (1); |
|
72 |
||
73 |
if (z1->io_offset < z2->io_offset) |
|
74 |
return (-1); |
|
75 |
if (z1->io_offset > z2->io_offset) |
|
76 |
return (1); |
|
77 |
||
78 |
if (z1 < z2) |
|
79 |
return (-1); |
|
80 |
if (z1 > z2) |
|
81 |
return (1); |
|
82 |
||
83 |
return (0); |
|
84 |
} |
|
85 |
||
86 |
int |
|
87 |
vdev_queue_offset_compare(const void *x1, const void *x2) |
|
88 |
{ |
|
89 |
const zio_t *z1 = x1; |
|
90 |
const zio_t *z2 = x2; |
|
91 |
||
92 |
if (z1->io_offset < z2->io_offset) |
|
93 |
return (-1); |
|
94 |
if (z1->io_offset > z2->io_offset) |
|
95 |
return (1); |
|
96 |
||
97 |
if (z1 < z2) |
|
98 |
return (-1); |
|
99 |
if (z1 > z2) |
|
100 |
return (1); |
|
101 |
||
102 |
return (0); |
|
103 |
} |
|
104 |
||
105 |
void |
|
106 |
vdev_queue_init(vdev_t *vd) |
|
107 |
{ |
|
108 |
vdev_queue_t *vq = &vd->vdev_queue; |
|
109 |
||
110 |
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); |
|
111 |
||
112 |
avl_create(&vq->vq_deadline_tree, vdev_queue_deadline_compare, |
|
113 |
sizeof (zio_t), offsetof(struct zio, io_deadline_node)); |
|
114 |
||
115 |
avl_create(&vq->vq_read_tree, vdev_queue_offset_compare, |
|
116 |
sizeof (zio_t), offsetof(struct zio, io_offset_node)); |
|
117 |
||
118 |
avl_create(&vq->vq_write_tree, vdev_queue_offset_compare, |
|
119 |
sizeof (zio_t), offsetof(struct zio, io_offset_node)); |
|
120 |
||
121 |
avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare, |
|
122 |
sizeof (zio_t), offsetof(struct zio, io_offset_node)); |
|
123 |
} |
|
124 |
||
125 |
void |
|
126 |
vdev_queue_fini(vdev_t *vd) |
|
127 |
{ |
|
128 |
vdev_queue_t *vq = &vd->vdev_queue; |
|
129 |
||
130 |
avl_destroy(&vq->vq_deadline_tree); |
|
131 |
avl_destroy(&vq->vq_read_tree); |
|
132 |
avl_destroy(&vq->vq_write_tree); |
|
133 |
avl_destroy(&vq->vq_pending_tree); |
|
134 |
||
135 |
mutex_destroy(&vq->vq_lock); |
|
136 |
} |
|
137 |
||
138 |
static void |
|
1544 | 139 |
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) |
140 |
{ |
|
141 |
avl_add(&vq->vq_deadline_tree, zio); |
|
142 |
avl_add(zio->io_vdev_tree, zio); |
|
143 |
} |
|
144 |
||
145 |
static void |
|
146 |
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) |
|
147 |
{ |
|
148 |
avl_remove(&vq->vq_deadline_tree, zio); |
|
149 |
avl_remove(zio->io_vdev_tree, zio); |
|
150 |
} |
|
151 |
||
152 |
static void |
|
789 | 153 |
vdev_queue_agg_io_done(zio_t *aio) |
154 |
{ |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
155 |
zio_t *pio; |
789 | 156 |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
157 |
while ((pio = zio_walk_parents(aio)) != NULL) |
789 | 158 |
if (aio->io_type == ZIO_TYPE_READ) |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
159 |
bcopy((char *)aio->io_data + (pio->io_offset - |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
160 |
aio->io_offset), pio->io_data, pio->io_size); |
789 | 161 |
|
162 |
zio_buf_free(aio->io_data, aio->io_size); |
|
163 |
} |
|
164 |
||
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
165 |
/* |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
166 |
* Compute the range spanned by two i/os, which is the endpoint of the last |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
167 |
* (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset). |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
168 |
* Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio); |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
169 |
* thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0. |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
170 |
*/ |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
171 |
#define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset) |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
172 |
#define IO_GAP(fio, lio) (-IO_SPAN(lio, fio)) |
789 | 173 |
|
174 |
static zio_t * |
|
5530 | 175 |
vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit) |
789 | 176 |
{ |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
177 |
zio_t *fio, *lio, *aio, *dio, *nio, *mio; |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
178 |
avl_tree_t *t; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
179 |
int flags; |
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
180 |
uint64_t maxspan = zfs_vdev_aggregation_limit; |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
181 |
uint64_t maxgap; |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
182 |
int stretch; |
789 | 183 |
|
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
184 |
again: |
789 | 185 |
ASSERT(MUTEX_HELD(&vq->vq_lock)); |
186 |
||
187 |
if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit || |
|
188 |
avl_numnodes(&vq->vq_deadline_tree) == 0) |
|
189 |
return (NULL); |
|
190 |
||
191 |
fio = lio = avl_first(&vq->vq_deadline_tree); |
|
192 |
||
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
193 |
t = fio->io_vdev_tree; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
194 |
flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT; |
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
195 |
maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0; |
789 | 196 |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
197 |
if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
198 |
/* |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
199 |
* We can aggregate I/Os that are sufficiently adjacent and of |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
200 |
* the same flavor, as expressed by the AGG_INHERIT flags. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
201 |
* The latter requirement is necessary so that certain |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
202 |
* attributes of the I/O, such as whether it's a normal I/O |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
203 |
* or a scrub/resilver, can be preserved in the aggregate. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
204 |
* We can include optional I/Os, but don't allow them |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
205 |
* to begin a range as they add no benefit in that situation. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
206 |
*/ |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
207 |
|
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
208 |
/* |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
209 |
* We keep track of the last non-optional I/O. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
210 |
*/ |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
211 |
mio = (fio->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : fio; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
212 |
|
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
213 |
/* |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
214 |
* Walk backwards through sufficiently contiguous I/Os |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
215 |
* recording the last non-option I/O. |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
216 |
*/ |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
217 |
while ((dio = AVL_PREV(t, fio)) != NULL && |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
218 |
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
219 |
IO_SPAN(dio, lio) <= maxspan && |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
220 |
IO_GAP(dio, fio) <= maxgap) { |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
221 |
fio = dio; |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
222 |
if (mio == NULL && !(fio->io_flags & ZIO_FLAG_OPTIONAL)) |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
223 |
mio = fio; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
224 |
} |
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
225 |
|
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
226 |
/* |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
227 |
* Skip any initial optional I/Os. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
228 |
*/ |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
229 |
while ((fio->io_flags & ZIO_FLAG_OPTIONAL) && fio != lio) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
230 |
fio = AVL_NEXT(t, fio); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
231 |
ASSERT(fio != NULL); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
232 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
233 |
|
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
234 |
/* |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
235 |
* Walk forward through sufficiently contiguous I/Os. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
236 |
*/ |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
237 |
while ((dio = AVL_NEXT(t, lio)) != NULL && |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
238 |
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
239 |
IO_SPAN(fio, dio) <= maxspan && |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
240 |
IO_GAP(lio, dio) <= maxgap) { |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
241 |
lio = dio; |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
242 |
if (!(lio->io_flags & ZIO_FLAG_OPTIONAL)) |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
243 |
mio = lio; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
244 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
245 |
|
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
246 |
/* |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
247 |
* Now that we've established the range of the I/O aggregation |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
248 |
* we must decide what to do with trailing optional I/Os. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
249 |
* For reads, there's nothing to do. While we are unable to |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
250 |
* aggregate further, it's possible that a trailing optional |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
251 |
* I/O would allow the underlying device to aggregate with |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
252 |
* subsequent I/Os. We must therefore determine if the next |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
253 |
* non-optional I/O is close enough to make aggregation |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
254 |
* worthwhile. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
255 |
*/ |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
256 |
stretch = B_FALSE; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
257 |
if (t != &vq->vq_read_tree && mio != NULL) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
258 |
nio = lio; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
259 |
while ((dio = AVL_NEXT(t, nio)) != NULL && |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
260 |
IO_GAP(nio, dio) == 0 && |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
261 |
IO_GAP(mio, dio) <= zfs_vdev_write_gap_limit) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
262 |
nio = dio; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
263 |
if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
264 |
stretch = B_TRUE; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
265 |
break; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
266 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
267 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
268 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
269 |
|
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
270 |
if (stretch) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
271 |
/* This may be a no-op. */ |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
272 |
VERIFY((dio = AVL_NEXT(t, lio)) != NULL); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
273 |
dio->io_flags &= ~ZIO_FLAG_OPTIONAL; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
274 |
} else { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
275 |
while (lio != mio && lio != fio) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
276 |
ASSERT(lio->io_flags & ZIO_FLAG_OPTIONAL); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
277 |
lio = AVL_PREV(t, lio); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
278 |
ASSERT(lio != NULL); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
279 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
280 |
} |
789 | 281 |
} |
282 |
||
283 |
if (fio != lio) { |
|
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
284 |
uint64_t size = IO_SPAN(fio, lio); |
3059 | 285 |
ASSERT(size <= zfs_vdev_aggregation_limit); |
789 | 286 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
287 |
aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset, |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
288 |
zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG, |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
289 |
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, |
789 | 290 |
vdev_queue_agg_io_done, NULL); |
291 |
||
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
292 |
nio = fio; |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
293 |
do { |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
294 |
dio = nio; |
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
295 |
nio = AVL_NEXT(t, dio); |
789 | 296 |
ASSERT(dio->io_type == aio->io_type); |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
297 |
ASSERT(dio->io_vdev_tree == t); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
298 |
|
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
299 |
if (dio->io_flags & ZIO_FLAG_NODATA) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
300 |
ASSERT(dio->io_type == ZIO_TYPE_WRITE); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
301 |
bzero((char *)aio->io_data + (dio->io_offset - |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
302 |
aio->io_offset), dio->io_size); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
303 |
} else if (dio->io_type == ZIO_TYPE_WRITE) { |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
304 |
bcopy(dio->io_data, (char *)aio->io_data + |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
305 |
(dio->io_offset - aio->io_offset), |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
306 |
dio->io_size); |
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
307 |
} |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
308 |
|
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
309 |
zio_add_child(dio, aio); |
1544 | 310 |
vdev_queue_io_remove(vq, dio); |
789 | 311 |
zio_vdev_io_bypass(dio); |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
312 |
zio_execute(dio); |
8692
692d4668b40d
6801507 ZFS read aggregation should not mind the gap
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
8632
diff
changeset
|
313 |
} while (dio != lio); |
789 | 314 |
|
315 |
avl_add(&vq->vq_pending_tree, aio); |
|
316 |
||
317 |
return (aio); |
|
318 |
} |
|
319 |
||
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
320 |
ASSERT(fio->io_vdev_tree == t); |
1544 | 321 |
vdev_queue_io_remove(vq, fio); |
789 | 322 |
|
10105
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
323 |
/* |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
324 |
* If the I/O is or was optional and therefore has no data, we need to |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
325 |
* simply discard it. We need to drop the vdev queue's lock to avoid a |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
326 |
* deadlock that we could encounter since this I/O will complete |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
327 |
* immediately. |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
328 |
*/ |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
329 |
if (fio->io_flags & ZIO_FLAG_NODATA) { |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
330 |
mutex_exit(&vq->vq_lock); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
331 |
zio_vdev_io_bypass(fio); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
332 |
zio_execute(fio); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
333 |
mutex_enter(&vq->vq_lock); |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
334 |
goto again; |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
335 |
} |
17811c723fb4
6854612 triple-parity RAID-Z
Adam Leventhal <adam.leventhal@sun.com>
parents:
8692
diff
changeset
|
336 |
|
789 | 337 |
avl_add(&vq->vq_pending_tree, fio); |
338 |
||
339 |
return (fio); |
|
340 |
} |
|
341 |
||
342 |
zio_t * |
|
343 |
vdev_queue_io(zio_t *zio) |
|
344 |
{ |
|
345 |
vdev_queue_t *vq = &zio->io_vd->vdev_queue; |
|
346 |
zio_t *nio; |
|
347 |
||
348 |
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); |
|
349 |
||
350 |
if (zio->io_flags & ZIO_FLAG_DONT_QUEUE) |
|
351 |
return (zio); |
|
352 |
||
353 |
zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE; |
|
354 |
||
355 |
if (zio->io_type == ZIO_TYPE_READ) |
|
356 |
zio->io_vdev_tree = &vq->vq_read_tree; |
|
357 |
else |
|
358 |
zio->io_vdev_tree = &vq->vq_write_tree; |
|
359 |
||
360 |
mutex_enter(&vq->vq_lock); |
|
361 |
||
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
10922
diff
changeset
|
362 |
zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) + |
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
10922
diff
changeset
|
363 |
zio->io_priority; |
789 | 364 |
|
1544 | 365 |
vdev_queue_io_add(vq, zio); |
789 | 366 |
|
5530 | 367 |
nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending); |
789 | 368 |
|
369 |
mutex_exit(&vq->vq_lock); |
|
370 |
||
5530 | 371 |
if (nio == NULL) |
372 |
return (NULL); |
|
789 | 373 |
|
5530 | 374 |
if (nio->io_done == vdev_queue_agg_io_done) { |
375 |
zio_nowait(nio); |
|
376 |
return (NULL); |
|
377 |
} |
|
378 |
||
379 |
return (nio); |
|
789 | 380 |
} |
381 |
||
382 |
void |
|
383 |
vdev_queue_io_done(zio_t *zio) |
|
384 |
{ |
|
385 |
vdev_queue_t *vq = &zio->io_vd->vdev_queue; |
|
386 |
||
387 |
mutex_enter(&vq->vq_lock); |
|
388 |
||
389 |
avl_remove(&vq->vq_pending_tree, zio); |
|
390 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
391 |
for (int i = 0; i < zfs_vdev_ramp_rate; i++) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
392 |
zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending); |
789 | 393 |
if (nio == NULL) |
394 |
break; |
|
395 |
mutex_exit(&vq->vq_lock); |
|
5530 | 396 |
if (nio->io_done == vdev_queue_agg_io_done) { |
397 |
zio_nowait(nio); |
|
398 |
} else { |
|
789 | 399 |
zio_vdev_io_reissue(nio); |
5530 | 400 |
zio_execute(nio); |
401 |
} |
|
789 | 402 |
mutex_enter(&vq->vq_lock); |
403 |
} |
|
404 |
||
405 |
mutex_exit(&vq->vq_lock); |
|
406 |
} |