author | Joshua M. Clulow <jmc@joyent.com> |
Mon, 04 Mar 2013 23:52:56 +0000 | |
changeset 14188 | afe390b9f1e0 |
parent 14167 | dceb17481b99 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
22 |
* Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
25 |
/* |
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
26 |
* Copyright (c) 2013 by Delphix. All rights reserved. |
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
27 |
*/ |
789 | 28 |
|
29 |
#include <sys/zfs_context.h> |
|
30 |
#include <sys/spa.h> |
|
31 |
#include <sys/vdev_impl.h> |
|
32 |
#include <sys/zio.h> |
|
5810 | 33 |
#include <sys/kstat.h> |
789 | 34 |
|
35 |
/* |
|
36 |
* Virtual device read-ahead caching. |
|
37 |
* |
|
38 |
* This file implements a simple LRU read-ahead cache. When the DMU reads |
|
39 |
* a given block, it will often want other, nearby blocks soon thereafter. |
|
40 |
* We take advantage of this by reading a larger disk region and caching |
|
5810 | 41 |
* the result. In the best case, this can turn 128 back-to-back 512-byte |
42 |
* reads into a single 64k read followed by 127 cache hits; this reduces |
|
789 | 43 |
* latency dramatically. In the worst case, it can turn an isolated 512-byte |
5810 | 44 |
* read into a 64k read, which doesn't affect latency all that much but is |
789 | 45 |
* terribly wasteful of bandwidth. A more intelligent version of the cache |
46 |
* could keep track of access patterns and not do read-ahead unless it sees |
|
4634
39bfb9e90d34
6437054 vdev_cache wises up: increase DB performance by 16%
ek110237
parents:
4451
diff
changeset
|
47 |
* at least two temporally close I/Os to the same region. Currently, only |
39bfb9e90d34
6437054 vdev_cache wises up: increase DB performance by 16%
ek110237
parents:
4451
diff
changeset
|
48 |
* metadata I/O is inflated. A futher enhancement could take advantage of |
39bfb9e90d34
6437054 vdev_cache wises up: increase DB performance by 16%
ek110237
parents:
4451
diff
changeset
|
49 |
* more semantic information about the I/O. And it could use something |
39bfb9e90d34
6437054 vdev_cache wises up: increase DB performance by 16%
ek110237
parents:
4451
diff
changeset
|
50 |
* faster than an AVL tree; that was chosen solely for convenience. |
789 | 51 |
* |
52 |
* There are five cache operations: allocate, fill, read, write, evict. |
|
53 |
* |
|
54 |
* (1) Allocate. This reserves a cache entry for the specified region. |
|
55 |
* We separate the allocate and fill operations so that multiple threads |
|
56 |
* don't generate I/O for the same cache miss. |
|
57 |
* |
|
58 |
* (2) Fill. When the I/O for a cache miss completes, the fill routine |
|
59 |
* places the data in the previously allocated cache entry. |
|
60 |
* |
|
61 |
* (3) Read. Read data from the cache. |
|
62 |
* |
|
63 |
* (4) Write. Update cache contents after write completion. |
|
64 |
* |
|
65 |
* (5) Evict. When allocating a new entry, we evict the oldest (LRU) entry |
|
3059 | 66 |
* if the total cache size exceeds zfs_vdev_cache_size. |
67 |
*/ |
|
68 |
||
69 |
/* |
|
70 |
* These tunables are for performance analysis. |
|
789 | 71 |
*/ |
3059 | 72 |
/* |
73 |
* All i/os smaller than zfs_vdev_cache_max will be turned into |
|
74 |
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software |
|
5810 | 75 |
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each |
3059 | 76 |
* vdev's vdev_cache. |
13346
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
77 |
* |
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
78 |
* TODO: Note that with the current ZFS code, it turns out that the |
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
79 |
* vdev cache is not helpful, and in some cases actually harmful. It |
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
80 |
* is better if we disable this. Once some time has passed, we should |
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
81 |
* actually remove this to simplify the code. For now we just disable |
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
82 |
* it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11 |
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
83 |
* has made these same changes. |
3059 | 84 |
*/ |
5810 | 85 |
int zfs_vdev_cache_max = 1<<14; /* 16KB */ |
13346
f3ce1af7c12d
175 zfs vdev cache consumes excessive memory
Garrett D'Amore <garrett@nexenta.com>
parents:
11066
diff
changeset
|
86 |
int zfs_vdev_cache_size = 0; |
3059 | 87 |
int zfs_vdev_cache_bshift = 16; |
88 |
||
5810 | 89 |
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */ |
90 |
||
91 |
kstat_t *vdc_ksp = NULL; |
|
92 |
||
93 |
typedef struct vdc_stats { |
|
94 |
kstat_named_t vdc_stat_delegations; |
|
95 |
kstat_named_t vdc_stat_hits; |
|
96 |
kstat_named_t vdc_stat_misses; |
|
97 |
} vdc_stats_t; |
|
98 |
||
99 |
static vdc_stats_t vdc_stats = { |
|
100 |
{ "delegations", KSTAT_DATA_UINT64 }, |
|
101 |
{ "hits", KSTAT_DATA_UINT64 }, |
|
102 |
{ "misses", KSTAT_DATA_UINT64 } |
|
103 |
}; |
|
104 |
||
105 |
#define VDCSTAT_BUMP(stat) atomic_add_64(&vdc_stats.stat.value.ui64, 1); |
|
789 | 106 |
|
107 |
static int |
|
108 |
vdev_cache_offset_compare(const void *a1, const void *a2) |
|
109 |
{ |
|
110 |
const vdev_cache_entry_t *ve1 = a1; |
|
111 |
const vdev_cache_entry_t *ve2 = a2; |
|
112 |
||
113 |
if (ve1->ve_offset < ve2->ve_offset) |
|
114 |
return (-1); |
|
115 |
if (ve1->ve_offset > ve2->ve_offset) |
|
116 |
return (1); |
|
117 |
return (0); |
|
118 |
} |
|
119 |
||
120 |
static int |
|
121 |
vdev_cache_lastused_compare(const void *a1, const void *a2) |
|
122 |
{ |
|
123 |
const vdev_cache_entry_t *ve1 = a1; |
|
124 |
const vdev_cache_entry_t *ve2 = a2; |
|
125 |
||
126 |
if (ve1->ve_lastused < ve2->ve_lastused) |
|
127 |
return (-1); |
|
128 |
if (ve1->ve_lastused > ve2->ve_lastused) |
|
129 |
return (1); |
|
130 |
||
131 |
/* |
|
132 |
* Among equally old entries, sort by offset to ensure uniqueness. |
|
133 |
*/ |
|
134 |
return (vdev_cache_offset_compare(a1, a2)); |
|
135 |
} |
|
136 |
||
137 |
/* |
|
138 |
* Evict the specified entry from the cache. |
|
139 |
*/ |
|
140 |
static void |
|
141 |
vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve) |
|
142 |
{ |
|
143 |
ASSERT(MUTEX_HELD(&vc->vc_lock)); |
|
144 |
ASSERT(ve->ve_fill_io == NULL); |
|
145 |
ASSERT(ve->ve_data != NULL); |
|
146 |
||
147 |
avl_remove(&vc->vc_lastused_tree, ve); |
|
148 |
avl_remove(&vc->vc_offset_tree, ve); |
|
3059 | 149 |
zio_buf_free(ve->ve_data, VCBS); |
789 | 150 |
kmem_free(ve, sizeof (vdev_cache_entry_t)); |
151 |
} |
|
152 |
||
153 |
/* |
|
154 |
* Allocate an entry in the cache. At the point we don't have the data, |
|
155 |
* we're just creating a placeholder so that multiple threads don't all |
|
156 |
* go off and read the same blocks. |
|
157 |
*/ |
|
158 |
static vdev_cache_entry_t * |
|
159 |
vdev_cache_allocate(zio_t *zio) |
|
160 |
{ |
|
161 |
vdev_cache_t *vc = &zio->io_vd->vdev_cache; |
|
3059 | 162 |
uint64_t offset = P2ALIGN(zio->io_offset, VCBS); |
789 | 163 |
vdev_cache_entry_t *ve; |
164 |
||
165 |
ASSERT(MUTEX_HELD(&vc->vc_lock)); |
|
166 |
||
3059 | 167 |
if (zfs_vdev_cache_size == 0) |
789 | 168 |
return (NULL); |
169 |
||
170 |
/* |
|
171 |
* If adding a new entry would exceed the cache size, |
|
172 |
* evict the oldest entry (LRU). |
|
173 |
*/ |
|
3059 | 174 |
if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) > |
175 |
zfs_vdev_cache_size) { |
|
789 | 176 |
ve = avl_first(&vc->vc_lastused_tree); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5810
diff
changeset
|
177 |
if (ve->ve_fill_io != NULL) |
789 | 178 |
return (NULL); |
179 |
ASSERT(ve->ve_hits != 0); |
|
180 |
vdev_cache_evict(vc, ve); |
|
181 |
} |
|
182 |
||
183 |
ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP); |
|
184 |
ve->ve_offset = offset; |
|
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
8632
diff
changeset
|
185 |
ve->ve_lastused = ddi_get_lbolt(); |
3059 | 186 |
ve->ve_data = zio_buf_alloc(VCBS); |
789 | 187 |
|
188 |
avl_add(&vc->vc_offset_tree, ve); |
|
189 |
avl_add(&vc->vc_lastused_tree, ve); |
|
190 |
||
191 |
return (ve); |
|
192 |
} |
|
193 |
||
194 |
static void |
|
195 |
vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) |
|
196 |
{ |
|
3059 | 197 |
uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); |
789 | 198 |
|
199 |
ASSERT(MUTEX_HELD(&vc->vc_lock)); |
|
200 |
ASSERT(ve->ve_fill_io == NULL); |
|
201 |
||
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
8632
diff
changeset
|
202 |
if (ve->ve_lastused != ddi_get_lbolt()) { |
789 | 203 |
avl_remove(&vc->vc_lastused_tree, ve); |
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
8632
diff
changeset
|
204 |
ve->ve_lastused = ddi_get_lbolt(); |
789 | 205 |
avl_add(&vc->vc_lastused_tree, ve); |
206 |
} |
|
207 |
||
208 |
ve->ve_hits++; |
|
209 |
bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size); |
|
210 |
} |
|
211 |
||
212 |
/* |
|
213 |
* Fill a previously allocated cache entry with data. |
|
214 |
*/ |
|
215 |
static void |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
216 |
vdev_cache_fill(zio_t *fio) |
789 | 217 |
{ |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
218 |
vdev_t *vd = fio->io_vd; |
789 | 219 |
vdev_cache_t *vc = &vd->vdev_cache; |
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
220 |
vdev_cache_entry_t *ve = fio->io_private; |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
221 |
zio_t *pio; |
789 | 222 |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
223 |
ASSERT(fio->io_size == VCBS); |
789 | 224 |
|
225 |
/* |
|
226 |
* Add data to the cache. |
|
227 |
*/ |
|
228 |
mutex_enter(&vc->vc_lock); |
|
229 |
||
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
230 |
ASSERT(ve->ve_fill_io == fio); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
231 |
ASSERT(ve->ve_offset == fio->io_offset); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
232 |
ASSERT(ve->ve_data == fio->io_data); |
789 | 233 |
|
234 |
ve->ve_fill_io = NULL; |
|
235 |
||
236 |
/* |
|
237 |
* Even if this cache line was invalidated by a missed write update, |
|
238 |
* any reads that were queued up before the missed update are still |
|
239 |
* valid, so we can satisfy them from this line before we evict it. |
|
240 |
*/ |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
241 |
while ((pio = zio_walk_parents(fio)) != NULL) |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
242 |
vdev_cache_hit(vc, ve, pio); |
789 | 243 |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
244 |
if (fio->io_error || ve->ve_missed_update) |
789 | 245 |
vdev_cache_evict(vc, ve); |
246 |
||
247 |
mutex_exit(&vc->vc_lock); |
|
248 |
} |
|
249 |
||
250 |
/* |
|
251 |
* Read data from the cache. Returns 0 on cache hit, errno on a miss. |
|
252 |
*/ |
|
253 |
int |
|
254 |
vdev_cache_read(zio_t *zio) |
|
255 |
{ |
|
256 |
vdev_cache_t *vc = &zio->io_vd->vdev_cache; |
|
257 |
vdev_cache_entry_t *ve, ve_search; |
|
3059 | 258 |
uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS); |
259 |
uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); |
|
789 | 260 |
zio_t *fio; |
261 |
||
262 |
ASSERT(zio->io_type == ZIO_TYPE_READ); |
|
263 |
||
264 |
if (zio->io_flags & ZIO_FLAG_DONT_CACHE) |
|
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
265 |
return (SET_ERROR(EINVAL)); |
789 | 266 |
|
3059 | 267 |
if (zio->io_size > zfs_vdev_cache_max) |
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
268 |
return (SET_ERROR(EOVERFLOW)); |
789 | 269 |
|
270 |
/* |
|
271 |
* If the I/O straddles two or more cache blocks, don't cache it. |
|
272 |
*/ |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
273 |
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS)) |
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
274 |
return (SET_ERROR(EXDEV)); |
789 | 275 |
|
3059 | 276 |
ASSERT(cache_phase + zio->io_size <= VCBS); |
789 | 277 |
|
278 |
mutex_enter(&vc->vc_lock); |
|
279 |
||
280 |
ve_search.ve_offset = cache_offset; |
|
281 |
ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL); |
|
282 |
||
283 |
if (ve != NULL) { |
|
284 |
if (ve->ve_missed_update) { |
|
285 |
mutex_exit(&vc->vc_lock); |
|
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
286 |
return (SET_ERROR(ESTALE)); |
789 | 287 |
} |
288 |
||
289 |
if ((fio = ve->ve_fill_io) != NULL) { |
|
290 |
zio_vdev_io_bypass(zio); |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
291 |
zio_add_child(zio, fio); |
789 | 292 |
mutex_exit(&vc->vc_lock); |
5810 | 293 |
VDCSTAT_BUMP(vdc_stat_delegations); |
789 | 294 |
return (0); |
295 |
} |
|
296 |
||
297 |
vdev_cache_hit(vc, ve, zio); |
|
298 |
zio_vdev_io_bypass(zio); |
|
299 |
||
300 |
mutex_exit(&vc->vc_lock); |
|
5810 | 301 |
VDCSTAT_BUMP(vdc_stat_hits); |
789 | 302 |
return (0); |
303 |
} |
|
304 |
||
305 |
ve = vdev_cache_allocate(zio); |
|
306 |
||
307 |
if (ve == NULL) { |
|
308 |
mutex_exit(&vc->vc_lock); |
|
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13346
diff
changeset
|
309 |
return (SET_ERROR(ENOMEM)); |
789 | 310 |
} |
311 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5810
diff
changeset
|
312 |
fio = zio_vdev_delegated_io(zio->io_vd, cache_offset, |
14167
dceb17481b99
4045 zfs write throttle & i/o scheduler performance work
Matthew Ahrens <mahrens@delphix.com>
parents:
13980
diff
changeset
|
313 |
ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5810
diff
changeset
|
314 |
ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve); |
789 | 315 |
|
316 |
ve->ve_fill_io = fio; |
|
317 |
zio_vdev_io_bypass(zio); |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
7837
diff
changeset
|
318 |
zio_add_child(zio, fio); |
789 | 319 |
|
320 |
mutex_exit(&vc->vc_lock); |
|
321 |
zio_nowait(fio); |
|
5810 | 322 |
VDCSTAT_BUMP(vdc_stat_misses); |
789 | 323 |
|
324 |
return (0); |
|
325 |
} |
|
326 |
||
327 |
/* |
|
328 |
* Update cache contents upon write completion. |
|
329 |
*/ |
|
330 |
void |
|
331 |
vdev_cache_write(zio_t *zio) |
|
332 |
{ |
|
333 |
vdev_cache_t *vc = &zio->io_vd->vdev_cache; |
|
334 |
vdev_cache_entry_t *ve, ve_search; |
|
335 |
uint64_t io_start = zio->io_offset; |
|
336 |
uint64_t io_end = io_start + zio->io_size; |
|
3059 | 337 |
uint64_t min_offset = P2ALIGN(io_start, VCBS); |
338 |
uint64_t max_offset = P2ROUNDUP(io_end, VCBS); |
|
789 | 339 |
avl_index_t where; |
340 |
||
341 |
ASSERT(zio->io_type == ZIO_TYPE_WRITE); |
|
342 |
||
343 |
mutex_enter(&vc->vc_lock); |
|
344 |
||
345 |
ve_search.ve_offset = min_offset; |
|
346 |
ve = avl_find(&vc->vc_offset_tree, &ve_search, &where); |
|
347 |
||
348 |
if (ve == NULL) |
|
349 |
ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER); |
|
350 |
||
351 |
while (ve != NULL && ve->ve_offset < max_offset) { |
|
352 |
uint64_t start = MAX(ve->ve_offset, io_start); |
|
3059 | 353 |
uint64_t end = MIN(ve->ve_offset + VCBS, io_end); |
789 | 354 |
|
355 |
if (ve->ve_fill_io != NULL) { |
|
356 |
ve->ve_missed_update = 1; |
|
357 |
} else { |
|
358 |
bcopy((char *)zio->io_data + start - io_start, |
|
359 |
ve->ve_data + start - ve->ve_offset, end - start); |
|
360 |
} |
|
361 |
ve = AVL_NEXT(&vc->vc_offset_tree, ve); |
|
362 |
} |
|
363 |
mutex_exit(&vc->vc_lock); |
|
364 |
} |
|
365 |
||
366 |
void |
|
4451 | 367 |
vdev_cache_purge(vdev_t *vd) |
368 |
{ |
|
369 |
vdev_cache_t *vc = &vd->vdev_cache; |
|
370 |
vdev_cache_entry_t *ve; |
|
371 |
||
372 |
mutex_enter(&vc->vc_lock); |
|
373 |
while ((ve = avl_first(&vc->vc_offset_tree)) != NULL) |
|
374 |
vdev_cache_evict(vc, ve); |
|
375 |
mutex_exit(&vc->vc_lock); |
|
376 |
} |
|
377 |
||
378 |
void |
|
789 | 379 |
vdev_cache_init(vdev_t *vd) |
380 |
{ |
|
381 |
vdev_cache_t *vc = &vd->vdev_cache; |
|
382 |
||
383 |
mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL); |
|
384 |
||
385 |
avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare, |
|
386 |
sizeof (vdev_cache_entry_t), |
|
387 |
offsetof(struct vdev_cache_entry, ve_offset_node)); |
|
388 |
||
389 |
avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare, |
|
390 |
sizeof (vdev_cache_entry_t), |
|
391 |
offsetof(struct vdev_cache_entry, ve_lastused_node)); |
|
392 |
} |
|
393 |
||
394 |
void |
|
395 |
vdev_cache_fini(vdev_t *vd) |
|
396 |
{ |
|
397 |
vdev_cache_t *vc = &vd->vdev_cache; |
|
398 |
||
4451 | 399 |
vdev_cache_purge(vd); |
789 | 400 |
|
401 |
avl_destroy(&vc->vc_offset_tree); |
|
402 |
avl_destroy(&vc->vc_lastused_tree); |
|
403 |
||
404 |
mutex_destroy(&vc->vc_lock); |
|
405 |
} |
|
5810 | 406 |
|
407 |
void |
|
408 |
vdev_cache_stat_init(void) |
|
409 |
{ |
|
410 |
vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc", |
|
411 |
KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t), |
|
412 |
KSTAT_FLAG_VIRTUAL); |
|
413 |
if (vdc_ksp != NULL) { |
|
414 |
vdc_ksp->ks_data = &vdc_stats; |
|
415 |
kstat_install(vdc_ksp); |
|
416 |
} |
|
417 |
} |
|
418 |
||
419 |
void |
|
420 |
vdev_cache_stat_fini(void) |
|
421 |
{ |
|
422 |
if (vdc_ksp != NULL) { |
|
423 |
kstat_delete(vdc_ksp); |
|
424 |
vdc_ksp = NULL; |
|
425 |
} |
|
426 |
} |