|
1 /* |
|
2 * CDDL HEADER START |
|
3 * |
|
4 * The contents of this file are subject to the terms of the |
|
5 * Common Development and Distribution License, Version 1.0 only |
|
6 * (the "License"). You may not use this file except in compliance |
|
7 * with the License. |
|
8 * |
|
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 * or http://www.opensolaris.org/os/licensing. |
|
11 * See the License for the specific language governing permissions |
|
12 * and limitations under the License. |
|
13 * |
|
14 * When distributing Covered Code, include this CDDL HEADER in each |
|
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 * If applicable, add the following below this CDDL HEADER, with the |
|
17 * fields enclosed by brackets "[]" replaced with your own identifying |
|
18 * information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 * |
|
20 * CDDL HEADER END |
|
21 */ |
|
22 /* |
|
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
|
24 * Use is subject to license terms. |
|
25 */ |
|
26 |
|
27 #pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
|
29 #include <sys/zfs_context.h> |
|
30 #include <sys/spa.h> |
|
31 #include <sys/spa_impl.h> |
|
32 #include <sys/dmu.h> |
|
33 #include <sys/dmu_tx.h> |
|
34 #include <sys/vdev_impl.h> |
|
35 #include <sys/uberblock_impl.h> |
|
36 #include <sys/metaslab.h> |
|
37 #include <sys/metaslab_impl.h> |
|
38 #include <sys/space_map.h> |
|
39 #include <sys/zio.h> |
|
40 #include <sys/zap.h> |
|
41 #include <sys/fs/zfs.h> |
|
42 |
|
43 /* |
|
44 * Virtual device management. |
|
45 */ |
|
46 |
|
47 static vdev_ops_t *vdev_ops_table[] = { |
|
48 &vdev_root_ops, |
|
49 &vdev_raidz_ops, |
|
50 &vdev_mirror_ops, |
|
51 &vdev_replacing_ops, |
|
52 &vdev_disk_ops, |
|
53 &vdev_file_ops, |
|
54 &vdev_missing_ops, |
|
55 NULL |
|
56 }; |
|
57 |
|
58 /* |
|
59 * Given a vdev type, return the appropriate ops vector. |
|
60 */ |
|
61 static vdev_ops_t * |
|
62 vdev_getops(const char *type) |
|
63 { |
|
64 vdev_ops_t *ops, **opspp; |
|
65 |
|
66 for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) |
|
67 if (strcmp(ops->vdev_op_type, type) == 0) |
|
68 break; |
|
69 |
|
70 return (ops); |
|
71 } |
|
72 |
|
73 /* |
|
74 * Default asize function: return the MAX of psize with the asize of |
|
75 * all children. This is what's used by anything other than RAID-Z. |
|
76 */ |
|
77 uint64_t |
|
78 vdev_default_asize(vdev_t *vd, uint64_t psize) |
|
79 { |
|
80 uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_ashift); |
|
81 uint64_t csize; |
|
82 uint64_t c; |
|
83 |
|
84 for (c = 0; c < vd->vdev_children; c++) { |
|
85 csize = vdev_psize_to_asize(vd->vdev_child[c], psize); |
|
86 asize = MAX(asize, csize); |
|
87 } |
|
88 |
|
89 return (asize); |
|
90 } |
|
91 |
|
92 vdev_t * |
|
93 vdev_lookup_top(spa_t *spa, uint64_t vdev) |
|
94 { |
|
95 vdev_t *rvd = spa->spa_root_vdev; |
|
96 |
|
97 if (vdev < rvd->vdev_children) |
|
98 return (rvd->vdev_child[vdev]); |
|
99 |
|
100 return (NULL); |
|
101 } |
|
102 |
|
103 vdev_t * |
|
104 vdev_lookup_by_path(vdev_t *vd, const char *path) |
|
105 { |
|
106 int c; |
|
107 vdev_t *mvd; |
|
108 |
|
109 if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) |
|
110 return (vd); |
|
111 |
|
112 for (c = 0; c < vd->vdev_children; c++) |
|
113 if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != |
|
114 NULL) |
|
115 return (mvd); |
|
116 |
|
117 return (NULL); |
|
118 } |
|
119 |
|
120 vdev_t * |
|
121 vdev_lookup_by_guid(vdev_t *vd, uint64_t guid) |
|
122 { |
|
123 int c; |
|
124 vdev_t *mvd; |
|
125 |
|
126 if (vd->vdev_children == 0 && vd->vdev_guid == guid) |
|
127 return (vd); |
|
128 |
|
129 for (c = 0; c < vd->vdev_children; c++) |
|
130 if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) != |
|
131 NULL) |
|
132 return (mvd); |
|
133 |
|
134 return (NULL); |
|
135 } |
|
136 |
|
137 void |
|
138 vdev_add_child(vdev_t *pvd, vdev_t *cvd) |
|
139 { |
|
140 size_t oldsize, newsize; |
|
141 uint64_t id = cvd->vdev_id; |
|
142 vdev_t **newchild; |
|
143 |
|
144 ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); |
|
145 ASSERT(cvd->vdev_parent == NULL); |
|
146 |
|
147 cvd->vdev_parent = pvd; |
|
148 |
|
149 if (pvd == NULL) |
|
150 return; |
|
151 |
|
152 ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL); |
|
153 |
|
154 oldsize = pvd->vdev_children * sizeof (vdev_t *); |
|
155 pvd->vdev_children = MAX(pvd->vdev_children, id + 1); |
|
156 newsize = pvd->vdev_children * sizeof (vdev_t *); |
|
157 |
|
158 newchild = kmem_zalloc(newsize, KM_SLEEP); |
|
159 if (pvd->vdev_child != NULL) { |
|
160 bcopy(pvd->vdev_child, newchild, oldsize); |
|
161 kmem_free(pvd->vdev_child, oldsize); |
|
162 } |
|
163 |
|
164 pvd->vdev_child = newchild; |
|
165 pvd->vdev_child[id] = cvd; |
|
166 |
|
167 cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); |
|
168 ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); |
|
169 |
|
170 /* |
|
171 * Walk up all ancestors to update guid sum. |
|
172 */ |
|
173 for (; pvd != NULL; pvd = pvd->vdev_parent) |
|
174 pvd->vdev_guid_sum += cvd->vdev_guid_sum; |
|
175 } |
|
176 |
|
177 void |
|
178 vdev_remove_child(vdev_t *pvd, vdev_t *cvd) |
|
179 { |
|
180 int c; |
|
181 uint_t id = cvd->vdev_id; |
|
182 |
|
183 ASSERT(cvd->vdev_parent == pvd); |
|
184 |
|
185 if (pvd == NULL) |
|
186 return; |
|
187 |
|
188 ASSERT(id < pvd->vdev_children); |
|
189 ASSERT(pvd->vdev_child[id] == cvd); |
|
190 |
|
191 pvd->vdev_child[id] = NULL; |
|
192 cvd->vdev_parent = NULL; |
|
193 |
|
194 for (c = 0; c < pvd->vdev_children; c++) |
|
195 if (pvd->vdev_child[c]) |
|
196 break; |
|
197 |
|
198 if (c == pvd->vdev_children) { |
|
199 kmem_free(pvd->vdev_child, c * sizeof (vdev_t *)); |
|
200 pvd->vdev_child = NULL; |
|
201 pvd->vdev_children = 0; |
|
202 } |
|
203 |
|
204 /* |
|
205 * Walk up all ancestors to update guid sum. |
|
206 */ |
|
207 for (; pvd != NULL; pvd = pvd->vdev_parent) |
|
208 pvd->vdev_guid_sum -= cvd->vdev_guid_sum; |
|
209 } |
|
210 |
|
211 /* |
|
212 * Remove any holes in the child array. |
|
213 */ |
|
214 void |
|
215 vdev_compact_children(vdev_t *pvd) |
|
216 { |
|
217 vdev_t **newchild, *cvd; |
|
218 int oldc = pvd->vdev_children; |
|
219 int newc, c; |
|
220 |
|
221 ASSERT(spa_config_held(pvd->vdev_spa, RW_WRITER)); |
|
222 |
|
223 for (c = newc = 0; c < oldc; c++) |
|
224 if (pvd->vdev_child[c]) |
|
225 newc++; |
|
226 |
|
227 newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP); |
|
228 |
|
229 for (c = newc = 0; c < oldc; c++) { |
|
230 if ((cvd = pvd->vdev_child[c]) != NULL) { |
|
231 newchild[newc] = cvd; |
|
232 cvd->vdev_id = newc++; |
|
233 } |
|
234 } |
|
235 |
|
236 kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *)); |
|
237 pvd->vdev_child = newchild; |
|
238 pvd->vdev_children = newc; |
|
239 } |
|
240 |
|
241 /* |
|
242 * Allocate and minimally initialize a vdev_t. |
|
243 */ |
|
244 static vdev_t * |
|
245 vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) |
|
246 { |
|
247 vdev_t *vd; |
|
248 |
|
249 while (guid == 0) |
|
250 guid = spa_get_random(-1ULL); |
|
251 |
|
252 vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); |
|
253 |
|
254 vd->vdev_spa = spa; |
|
255 vd->vdev_id = id; |
|
256 vd->vdev_guid = guid; |
|
257 vd->vdev_guid_sum = guid; |
|
258 vd->vdev_ops = ops; |
|
259 vd->vdev_state = VDEV_STATE_CLOSED; |
|
260 |
|
261 mutex_init(&vd->vdev_io_lock, NULL, MUTEX_DEFAULT, NULL); |
|
262 cv_init(&vd->vdev_io_cv, NULL, CV_DEFAULT, NULL); |
|
263 list_create(&vd->vdev_io_pending, sizeof (zio_t), |
|
264 offsetof(zio_t, io_pending)); |
|
265 mutex_init(&vd->vdev_dirty_lock, NULL, MUTEX_DEFAULT, NULL); |
|
266 mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); |
|
267 space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock); |
|
268 space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock); |
|
269 txg_list_create(&vd->vdev_ms_list, |
|
270 offsetof(struct metaslab, ms_txg_node)); |
|
271 txg_list_create(&vd->vdev_dtl_list, |
|
272 offsetof(struct vdev, vdev_dtl_node)); |
|
273 vd->vdev_stat.vs_timestamp = gethrtime(); |
|
274 |
|
275 return (vd); |
|
276 } |
|
277 |
|
278 /* |
|
279 * Free a vdev_t that has been removed from service. |
|
280 */ |
|
281 static void |
|
282 vdev_free_common(vdev_t *vd) |
|
283 { |
|
284 if (vd->vdev_path) |
|
285 spa_strfree(vd->vdev_path); |
|
286 if (vd->vdev_devid) |
|
287 spa_strfree(vd->vdev_devid); |
|
288 |
|
289 txg_list_destroy(&vd->vdev_ms_list); |
|
290 txg_list_destroy(&vd->vdev_dtl_list); |
|
291 mutex_enter(&vd->vdev_dtl_lock); |
|
292 space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); |
|
293 space_map_destroy(&vd->vdev_dtl_map); |
|
294 space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
295 space_map_destroy(&vd->vdev_dtl_scrub); |
|
296 mutex_exit(&vd->vdev_dtl_lock); |
|
297 mutex_destroy(&vd->vdev_dtl_lock); |
|
298 mutex_destroy(&vd->vdev_dirty_lock); |
|
299 list_destroy(&vd->vdev_io_pending); |
|
300 mutex_destroy(&vd->vdev_io_lock); |
|
301 cv_destroy(&vd->vdev_io_cv); |
|
302 |
|
303 kmem_free(vd, sizeof (vdev_t)); |
|
304 } |
|
305 |
|
306 /* |
|
307 * Allocate a new vdev. The 'alloctype' is used to control whether we are |
|
308 * creating a new vdev or loading an existing one - the behavior is slightly |
|
309 * different for each case. |
|
310 */ |
|
311 vdev_t * |
|
312 vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) |
|
313 { |
|
314 vdev_ops_t *ops; |
|
315 char *type; |
|
316 uint64_t guid = 0; |
|
317 vdev_t *vd; |
|
318 |
|
319 ASSERT(spa_config_held(spa, RW_WRITER)); |
|
320 |
|
321 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) |
|
322 return (NULL); |
|
323 |
|
324 if ((ops = vdev_getops(type)) == NULL) |
|
325 return (NULL); |
|
326 |
|
327 /* |
|
328 * If this is a load, get the vdev guid from the nvlist. |
|
329 * Otherwise, vdev_alloc_common() will generate one for us. |
|
330 */ |
|
331 if (alloctype == VDEV_ALLOC_LOAD) { |
|
332 uint64_t label_id; |
|
333 |
|
334 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || |
|
335 label_id != id) |
|
336 return (NULL); |
|
337 |
|
338 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) |
|
339 return (NULL); |
|
340 } |
|
341 |
|
342 vd = vdev_alloc_common(spa, id, guid, ops); |
|
343 |
|
344 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) |
|
345 vd->vdev_path = spa_strdup(vd->vdev_path); |
|
346 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) |
|
347 vd->vdev_devid = spa_strdup(vd->vdev_devid); |
|
348 |
|
349 /* |
|
350 * If we're a top-level vdev, try to load the allocation parameters. |
|
351 */ |
|
352 if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { |
|
353 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, |
|
354 &vd->vdev_ms_array); |
|
355 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, |
|
356 &vd->vdev_ms_shift); |
|
357 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, |
|
358 &vd->vdev_ashift); |
|
359 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, |
|
360 &vd->vdev_asize); |
|
361 } |
|
362 |
|
363 /* |
|
364 * If we're a leaf vdev, try to load the DTL object. |
|
365 */ |
|
366 if (vd->vdev_ops->vdev_op_leaf && alloctype == VDEV_ALLOC_LOAD) { |
|
367 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, |
|
368 &vd->vdev_dtl.smo_object); |
|
369 } |
|
370 |
|
371 /* |
|
372 * Add ourselves to the parent's list of children. |
|
373 */ |
|
374 vdev_add_child(parent, vd); |
|
375 |
|
376 return (vd); |
|
377 } |
|
378 |
|
379 void |
|
380 vdev_free(vdev_t *vd) |
|
381 { |
|
382 int c; |
|
383 |
|
384 /* |
|
385 * vdev_free() implies closing the vdev first. This is simpler than |
|
386 * trying to ensure complicated semantics for all callers. |
|
387 */ |
|
388 vdev_close(vd); |
|
389 |
|
390 /* |
|
391 * It's possible to free a vdev that's been added to the dirty |
|
392 * list when in the middle of spa_vdev_add(). Handle that case |
|
393 * correctly here. |
|
394 */ |
|
395 if (vd->vdev_is_dirty) |
|
396 vdev_config_clean(vd); |
|
397 |
|
398 /* |
|
399 * Free all children. |
|
400 */ |
|
401 for (c = 0; c < vd->vdev_children; c++) |
|
402 vdev_free(vd->vdev_child[c]); |
|
403 |
|
404 ASSERT(vd->vdev_child == NULL); |
|
405 ASSERT(vd->vdev_guid_sum == vd->vdev_guid); |
|
406 |
|
407 /* |
|
408 * Discard allocation state. |
|
409 */ |
|
410 if (vd == vd->vdev_top) |
|
411 vdev_metaslab_fini(vd); |
|
412 |
|
413 ASSERT3U(vd->vdev_stat.vs_space, ==, 0); |
|
414 ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); |
|
415 |
|
416 /* |
|
417 * Remove this vdev from its parent's child list. |
|
418 */ |
|
419 vdev_remove_child(vd->vdev_parent, vd); |
|
420 |
|
421 ASSERT(vd->vdev_parent == NULL); |
|
422 |
|
423 vdev_free_common(vd); |
|
424 } |
|
425 |
|
426 /* |
|
427 * Transfer top-level vdev state from svd to tvd. |
|
428 */ |
|
429 static void |
|
430 vdev_top_transfer(vdev_t *svd, vdev_t *tvd) |
|
431 { |
|
432 spa_t *spa = svd->vdev_spa; |
|
433 metaslab_t *msp; |
|
434 vdev_t *vd; |
|
435 int t; |
|
436 |
|
437 ASSERT(tvd == tvd->vdev_top); |
|
438 |
|
439 tvd->vdev_ms_array = svd->vdev_ms_array; |
|
440 tvd->vdev_ms_shift = svd->vdev_ms_shift; |
|
441 tvd->vdev_ms_count = svd->vdev_ms_count; |
|
442 |
|
443 svd->vdev_ms_array = 0; |
|
444 svd->vdev_ms_shift = 0; |
|
445 svd->vdev_ms_count = 0; |
|
446 |
|
447 tvd->vdev_mg = svd->vdev_mg; |
|
448 tvd->vdev_mg->mg_vd = tvd; |
|
449 tvd->vdev_ms = svd->vdev_ms; |
|
450 tvd->vdev_smo = svd->vdev_smo; |
|
451 |
|
452 svd->vdev_mg = NULL; |
|
453 svd->vdev_ms = NULL; |
|
454 svd->vdev_smo = NULL; |
|
455 |
|
456 tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; |
|
457 tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; |
|
458 |
|
459 svd->vdev_stat.vs_alloc = 0; |
|
460 svd->vdev_stat.vs_space = 0; |
|
461 |
|
462 for (t = 0; t < TXG_SIZE; t++) { |
|
463 while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) |
|
464 (void) txg_list_add(&tvd->vdev_ms_list, msp, t); |
|
465 while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL) |
|
466 (void) txg_list_add(&tvd->vdev_dtl_list, vd, t); |
|
467 if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t)) |
|
468 (void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t); |
|
469 tvd->vdev_dirty[t] = svd->vdev_dirty[t]; |
|
470 svd->vdev_dirty[t] = 0; |
|
471 } |
|
472 |
|
473 if (svd->vdev_is_dirty) { |
|
474 vdev_config_clean(svd); |
|
475 vdev_config_dirty(tvd); |
|
476 } |
|
477 |
|
478 ASSERT(svd->vdev_io_retry == NULL); |
|
479 ASSERT(list_is_empty(&svd->vdev_io_pending)); |
|
480 } |
|
481 |
|
482 static void |
|
483 vdev_top_update(vdev_t *tvd, vdev_t *vd) |
|
484 { |
|
485 int c; |
|
486 |
|
487 if (vd == NULL) |
|
488 return; |
|
489 |
|
490 vd->vdev_top = tvd; |
|
491 |
|
492 for (c = 0; c < vd->vdev_children; c++) |
|
493 vdev_top_update(tvd, vd->vdev_child[c]); |
|
494 } |
|
495 |
|
496 /* |
|
497 * Add a mirror/replacing vdev above an existing vdev. |
|
498 */ |
|
499 vdev_t * |
|
500 vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) |
|
501 { |
|
502 spa_t *spa = cvd->vdev_spa; |
|
503 vdev_t *pvd = cvd->vdev_parent; |
|
504 vdev_t *mvd; |
|
505 |
|
506 ASSERT(spa_config_held(spa, RW_WRITER)); |
|
507 |
|
508 mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); |
|
509 vdev_remove_child(pvd, cvd); |
|
510 vdev_add_child(pvd, mvd); |
|
511 cvd->vdev_id = mvd->vdev_children; |
|
512 vdev_add_child(mvd, cvd); |
|
513 vdev_top_update(cvd->vdev_top, cvd->vdev_top); |
|
514 |
|
515 mvd->vdev_asize = cvd->vdev_asize; |
|
516 mvd->vdev_ashift = cvd->vdev_ashift; |
|
517 mvd->vdev_state = cvd->vdev_state; |
|
518 |
|
519 if (mvd == mvd->vdev_top) |
|
520 vdev_top_transfer(cvd, mvd); |
|
521 |
|
522 return (mvd); |
|
523 } |
|
524 |
|
525 /* |
|
526 * Remove a 1-way mirror/replacing vdev from the tree. |
|
527 */ |
|
528 void |
|
529 vdev_remove_parent(vdev_t *cvd) |
|
530 { |
|
531 vdev_t *mvd = cvd->vdev_parent; |
|
532 vdev_t *pvd = mvd->vdev_parent; |
|
533 |
|
534 ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); |
|
535 |
|
536 ASSERT(mvd->vdev_children == 1); |
|
537 ASSERT(mvd->vdev_ops == &vdev_mirror_ops || |
|
538 mvd->vdev_ops == &vdev_replacing_ops); |
|
539 |
|
540 vdev_remove_child(mvd, cvd); |
|
541 vdev_remove_child(pvd, mvd); |
|
542 cvd->vdev_id = mvd->vdev_id; |
|
543 vdev_add_child(pvd, cvd); |
|
544 vdev_top_update(cvd->vdev_top, cvd->vdev_top); |
|
545 |
|
546 if (cvd == cvd->vdev_top) |
|
547 vdev_top_transfer(mvd, cvd); |
|
548 |
|
549 ASSERT(mvd->vdev_children == 0); |
|
550 vdev_free(mvd); |
|
551 } |
|
552 |
|
553 void |
|
554 vdev_metaslab_init(vdev_t *vd, uint64_t txg) |
|
555 { |
|
556 spa_t *spa = vd->vdev_spa; |
|
557 metaslab_class_t *mc = spa_metaslab_class_select(spa); |
|
558 uint64_t c; |
|
559 uint64_t oldc = vd->vdev_ms_count; |
|
560 uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; |
|
561 space_map_obj_t *smo = vd->vdev_smo; |
|
562 metaslab_t **mspp = vd->vdev_ms; |
|
563 |
|
564 dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc); |
|
565 |
|
566 ASSERT(oldc <= newc); |
|
567 |
|
568 vd->vdev_smo = kmem_zalloc(newc * sizeof (*smo), KM_SLEEP); |
|
569 vd->vdev_ms = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); |
|
570 vd->vdev_ms_count = newc; |
|
571 |
|
572 if (vd->vdev_mg == NULL) { |
|
573 if (txg == 0) { |
|
574 dmu_buf_t *db; |
|
575 uint64_t *ms_array; |
|
576 |
|
577 ms_array = kmem_zalloc(newc * sizeof (uint64_t), |
|
578 KM_SLEEP); |
|
579 |
|
580 dmu_read(spa->spa_meta_objset, vd->vdev_ms_array, |
|
581 0, newc * sizeof (uint64_t), ms_array); |
|
582 |
|
583 for (c = 0; c < newc; c++) { |
|
584 if (ms_array[c] == 0) |
|
585 continue; |
|
586 db = dmu_bonus_hold(spa->spa_meta_objset, |
|
587 ms_array[c]); |
|
588 dmu_buf_read(db); |
|
589 ASSERT3U(db->db_size, ==, sizeof (*smo)); |
|
590 bcopy(db->db_data, &vd->vdev_smo[c], |
|
591 db->db_size); |
|
592 ASSERT3U(vd->vdev_smo[c].smo_object, ==, |
|
593 ms_array[c]); |
|
594 dmu_buf_rele(db); |
|
595 } |
|
596 kmem_free(ms_array, newc * sizeof (uint64_t)); |
|
597 } |
|
598 vd->vdev_mg = metaslab_group_create(mc, vd); |
|
599 } |
|
600 |
|
601 for (c = 0; c < oldc; c++) { |
|
602 vd->vdev_smo[c] = smo[c]; |
|
603 vd->vdev_ms[c] = mspp[c]; |
|
604 mspp[c]->ms_smo = &vd->vdev_smo[c]; |
|
605 } |
|
606 |
|
607 for (c = oldc; c < newc; c++) |
|
608 metaslab_init(vd->vdev_mg, &vd->vdev_smo[c], &vd->vdev_ms[c], |
|
609 c << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg); |
|
610 |
|
611 if (oldc != 0) { |
|
612 kmem_free(smo, oldc * sizeof (*smo)); |
|
613 kmem_free(mspp, oldc * sizeof (*mspp)); |
|
614 } |
|
615 |
|
616 } |
|
617 |
|
618 void |
|
619 vdev_metaslab_fini(vdev_t *vd) |
|
620 { |
|
621 uint64_t m; |
|
622 uint64_t count = vd->vdev_ms_count; |
|
623 |
|
624 if (vd->vdev_ms != NULL) { |
|
625 for (m = 0; m < count; m++) |
|
626 metaslab_fini(vd->vdev_ms[m]); |
|
627 kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); |
|
628 vd->vdev_ms = NULL; |
|
629 } |
|
630 |
|
631 if (vd->vdev_smo != NULL) { |
|
632 kmem_free(vd->vdev_smo, count * sizeof (space_map_obj_t)); |
|
633 vd->vdev_smo = NULL; |
|
634 } |
|
635 } |
|
636 |
|
637 /* |
|
638 * Prepare a virtual device for access. |
|
639 */ |
|
640 int |
|
641 vdev_open(vdev_t *vd) |
|
642 { |
|
643 int error; |
|
644 vdev_knob_t *vk; |
|
645 int c; |
|
646 uint64_t osize = 0; |
|
647 uint64_t asize, psize; |
|
648 uint64_t ashift = -1ULL; |
|
649 |
|
650 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || |
|
651 vd->vdev_state == VDEV_STATE_CANT_OPEN || |
|
652 vd->vdev_state == VDEV_STATE_OFFLINE); |
|
653 |
|
654 if (vd->vdev_fault_mode == VDEV_FAULT_COUNT) |
|
655 vd->vdev_fault_arg >>= 1; |
|
656 else |
|
657 vd->vdev_fault_mode = VDEV_FAULT_NONE; |
|
658 |
|
659 vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
|
660 |
|
661 for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { |
|
662 uint64_t *valp = (uint64_t *)((char *)vd + vk->vk_offset); |
|
663 |
|
664 *valp = vk->vk_default; |
|
665 *valp = MAX(*valp, vk->vk_min); |
|
666 *valp = MIN(*valp, vk->vk_max); |
|
667 } |
|
668 |
|
669 if (vd->vdev_ops->vdev_op_leaf) { |
|
670 vdev_cache_init(vd); |
|
671 vdev_queue_init(vd); |
|
672 vd->vdev_cache_active = B_TRUE; |
|
673 } |
|
674 |
|
675 if (vd->vdev_offline) { |
|
676 ASSERT(vd->vdev_children == 0); |
|
677 dprintf("OFFLINE: %s = ENXIO\n", vdev_description(vd)); |
|
678 vd->vdev_state = VDEV_STATE_OFFLINE; |
|
679 return (ENXIO); |
|
680 } |
|
681 |
|
682 error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); |
|
683 |
|
684 dprintf("%s = %d, osize %llu, state = %d\n", |
|
685 vdev_description(vd), error, osize, vd->vdev_state); |
|
686 |
|
687 if (error) { |
|
688 dprintf("%s in %s failed to open, error %d, aux %d\n", |
|
689 vdev_description(vd), |
|
690 vdev_description(vd->vdev_parent), |
|
691 error, |
|
692 vd->vdev_stat.vs_aux); |
|
693 |
|
694 vd->vdev_state = VDEV_STATE_CANT_OPEN; |
|
695 return (error); |
|
696 } |
|
697 |
|
698 vd->vdev_state = VDEV_STATE_HEALTHY; |
|
699 |
|
700 for (c = 0; c < vd->vdev_children; c++) |
|
701 if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) |
|
702 vd->vdev_state = VDEV_STATE_DEGRADED; |
|
703 |
|
704 osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); |
|
705 |
|
706 if (vd->vdev_children == 0) { |
|
707 if (osize < SPA_MINDEVSIZE) { |
|
708 vd->vdev_state = VDEV_STATE_CANT_OPEN; |
|
709 vd->vdev_stat.vs_aux = VDEV_AUX_TOO_SMALL; |
|
710 return (EOVERFLOW); |
|
711 } |
|
712 psize = osize; |
|
713 asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); |
|
714 } else { |
|
715 if (osize < SPA_MINDEVSIZE - |
|
716 (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { |
|
717 vd->vdev_state = VDEV_STATE_CANT_OPEN; |
|
718 vd->vdev_stat.vs_aux = VDEV_AUX_TOO_SMALL; |
|
719 return (EOVERFLOW); |
|
720 } |
|
721 psize = 0; |
|
722 asize = osize; |
|
723 } |
|
724 |
|
725 vd->vdev_psize = psize; |
|
726 |
|
727 if (vd->vdev_asize == 0) { |
|
728 /* |
|
729 * This is the first-ever open, so use the computed values. |
|
730 */ |
|
731 vd->vdev_asize = asize; |
|
732 vd->vdev_ashift = ashift; |
|
733 } else { |
|
734 /* |
|
735 * Make sure the alignment requirement hasn't increased. |
|
736 */ |
|
737 if (ashift > vd->vdev_ashift) { |
|
738 dprintf("%s: ashift grew\n", vdev_description(vd)); |
|
739 vd->vdev_state = VDEV_STATE_CANT_OPEN; |
|
740 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
741 return (EINVAL); |
|
742 } |
|
743 |
|
744 /* |
|
745 * Make sure the device hasn't shrunk. |
|
746 */ |
|
747 if (asize < vd->vdev_asize) { |
|
748 dprintf("%s: device shrank\n", vdev_description(vd)); |
|
749 vd->vdev_state = VDEV_STATE_CANT_OPEN; |
|
750 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
751 return (EINVAL); |
|
752 } |
|
753 |
|
754 /* |
|
755 * If all children are healthy and the asize has increased, |
|
756 * then we've experienced dynamic LUN growth. |
|
757 */ |
|
758 if (vd->vdev_state == VDEV_STATE_HEALTHY && |
|
759 asize > vd->vdev_asize) { |
|
760 dprintf("%s: device grew\n", vdev_description(vd)); |
|
761 vd->vdev_asize = asize; |
|
762 } |
|
763 } |
|
764 |
|
765 return (0); |
|
766 } |
|
767 |
|
768 /* |
|
769 * Close a virtual device. |
|
770 */ |
|
771 void |
|
772 vdev_close(vdev_t *vd) |
|
773 { |
|
774 ASSERT3P(list_head(&vd->vdev_io_pending), ==, NULL); |
|
775 |
|
776 vd->vdev_ops->vdev_op_close(vd); |
|
777 |
|
778 if (vd->vdev_cache_active) { |
|
779 vdev_cache_fini(vd); |
|
780 vdev_queue_fini(vd); |
|
781 vd->vdev_cache_active = B_FALSE; |
|
782 } |
|
783 |
|
784 if (vd->vdev_offline) |
|
785 vd->vdev_state = VDEV_STATE_OFFLINE; |
|
786 else |
|
787 vd->vdev_state = VDEV_STATE_CLOSED; |
|
788 } |
|
789 |
|
790 void |
|
791 vdev_reopen(vdev_t *vd, zio_t **rq) |
|
792 { |
|
793 vdev_t *rvd = vd->vdev_spa->spa_root_vdev; |
|
794 int c; |
|
795 |
|
796 if (vd == rvd) { |
|
797 ASSERT(rq == NULL); |
|
798 for (c = 0; c < rvd->vdev_children; c++) |
|
799 vdev_reopen(rvd->vdev_child[c], NULL); |
|
800 return; |
|
801 } |
|
802 |
|
803 /* only valid for top-level vdevs */ |
|
804 ASSERT3P(vd, ==, vd->vdev_top); |
|
805 |
|
806 /* |
|
807 * vdev_state can change when spa_config_lock is held as writer, |
|
808 * or when it's held as reader and we're doing a vdev_reopen(). |
|
809 * To handle the latter case, we grab rvd's io_lock to serialize |
|
810 * reopens. This ensures that there's never more than one vdev |
|
811 * state changer active at a time. |
|
812 */ |
|
813 mutex_enter(&rvd->vdev_io_lock); |
|
814 |
|
815 mutex_enter(&vd->vdev_io_lock); |
|
816 while (list_head(&vd->vdev_io_pending) != NULL) |
|
817 cv_wait(&vd->vdev_io_cv, &vd->vdev_io_lock); |
|
818 vdev_close(vd); |
|
819 (void) vdev_open(vd); |
|
820 if (rq != NULL) { |
|
821 *rq = vd->vdev_io_retry; |
|
822 vd->vdev_io_retry = NULL; |
|
823 } |
|
824 mutex_exit(&vd->vdev_io_lock); |
|
825 |
|
826 /* |
|
827 * Reassess root vdev's health. |
|
828 */ |
|
829 rvd->vdev_state = VDEV_STATE_HEALTHY; |
|
830 for (c = 0; c < rvd->vdev_children; c++) { |
|
831 uint64_t state = rvd->vdev_child[c]->vdev_state; |
|
832 rvd->vdev_state = MIN(rvd->vdev_state, state); |
|
833 } |
|
834 |
|
835 mutex_exit(&rvd->vdev_io_lock); |
|
836 } |
|
837 |
|
838 int |
|
839 vdev_create(vdev_t *vd, uint64_t txg) |
|
840 { |
|
841 int error; |
|
842 |
|
843 /* |
|
844 * Normally, partial opens (e.g. of a mirror) are allowed. |
|
845 * For a create, however, we want to fail the request if |
|
846 * there are any components we can't open. |
|
847 */ |
|
848 error = vdev_open(vd); |
|
849 |
|
850 if (error || vd->vdev_state != VDEV_STATE_HEALTHY) { |
|
851 vdev_close(vd); |
|
852 return (error ? error : ENXIO); |
|
853 } |
|
854 |
|
855 /* |
|
856 * Recursively initialize all labels. |
|
857 */ |
|
858 if ((error = vdev_label_init(vd, txg)) != 0) { |
|
859 vdev_close(vd); |
|
860 return (error); |
|
861 } |
|
862 |
|
863 return (0); |
|
864 } |
|
865 |
|
866 /* |
|
867 * The is the latter half of vdev_create(). It is distinct because it |
|
868 * involves initiating transactions in order to do metaslab creation. |
|
869 * For creation, we want to try to create all vdevs at once and then undo it |
|
870 * if anything fails; this is much harder if we have pending transactions. |
|
871 */ |
|
872 void |
|
873 vdev_init(vdev_t *vd, uint64_t txg) |
|
874 { |
|
875 /* |
|
876 * Aim for roughly 200 metaslabs per vdev. |
|
877 */ |
|
878 vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); |
|
879 vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); |
|
880 |
|
881 /* |
|
882 * Initialize the vdev's metaslabs. |
|
883 */ |
|
884 vdev_metaslab_init(vd, txg); |
|
885 } |
|
886 |
|
887 void |
|
888 vdev_dirty(vdev_t *vd, uint8_t flags, uint64_t txg) |
|
889 { |
|
890 vdev_t *tvd = vd->vdev_top; |
|
891 |
|
892 mutex_enter(&tvd->vdev_dirty_lock); |
|
893 if ((tvd->vdev_dirty[txg & TXG_MASK] & flags) != flags) { |
|
894 tvd->vdev_dirty[txg & TXG_MASK] |= flags; |
|
895 (void) txg_list_add(&tvd->vdev_spa->spa_vdev_txg_list, |
|
896 tvd, txg); |
|
897 } |
|
898 mutex_exit(&tvd->vdev_dirty_lock); |
|
899 } |
|
900 |
|
901 void |
|
902 vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size) |
|
903 { |
|
904 mutex_enter(sm->sm_lock); |
|
905 if (!space_map_contains(sm, txg, size)) |
|
906 space_map_add(sm, txg, size); |
|
907 mutex_exit(sm->sm_lock); |
|
908 } |
|
909 |
|
910 int |
|
911 vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size) |
|
912 { |
|
913 int dirty; |
|
914 |
|
915 /* |
|
916 * Quick test without the lock -- covers the common case that |
|
917 * there are no dirty time segments. |
|
918 */ |
|
919 if (sm->sm_space == 0) |
|
920 return (0); |
|
921 |
|
922 mutex_enter(sm->sm_lock); |
|
923 dirty = space_map_contains(sm, txg, size); |
|
924 mutex_exit(sm->sm_lock); |
|
925 |
|
926 return (dirty); |
|
927 } |
|
928 |
|
929 /* |
|
930 * Reassess DTLs after a config change or scrub completion. |
|
931 */ |
|
932 void |
|
933 vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) |
|
934 { |
|
935 int c; |
|
936 |
|
937 ASSERT(spa_config_held(vd->vdev_spa, RW_WRITER)); |
|
938 |
|
939 if (vd->vdev_children == 0) { |
|
940 mutex_enter(&vd->vdev_dtl_lock); |
|
941 /* |
|
942 * We're successfully scrubbed everything up to scrub_txg. |
|
943 * Therefore, excise all old DTLs up to that point, then |
|
944 * fold in the DTLs for everything we couldn't scrub. |
|
945 */ |
|
946 if (scrub_txg != 0) { |
|
947 space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg); |
|
948 space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub); |
|
949 } |
|
950 if (scrub_done) |
|
951 space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
952 mutex_exit(&vd->vdev_dtl_lock); |
|
953 if (txg != 0) { |
|
954 vdev_t *tvd = vd->vdev_top; |
|
955 vdev_dirty(tvd, VDD_DTL, txg); |
|
956 (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); |
|
957 } |
|
958 return; |
|
959 } |
|
960 |
|
961 mutex_enter(&vd->vdev_dtl_lock); |
|
962 space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); |
|
963 space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
964 mutex_exit(&vd->vdev_dtl_lock); |
|
965 |
|
966 for (c = 0; c < vd->vdev_children; c++) { |
|
967 vdev_t *cvd = vd->vdev_child[c]; |
|
968 vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done); |
|
969 mutex_enter(&vd->vdev_dtl_lock); |
|
970 space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map); |
|
971 space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub); |
|
972 mutex_exit(&vd->vdev_dtl_lock); |
|
973 } |
|
974 } |
|
975 |
|
976 static int |
|
977 vdev_dtl_load(vdev_t *vd) |
|
978 { |
|
979 spa_t *spa = vd->vdev_spa; |
|
980 space_map_obj_t *smo = &vd->vdev_dtl; |
|
981 dmu_buf_t *db; |
|
982 int error; |
|
983 |
|
984 ASSERT(vd->vdev_children == 0); |
|
985 |
|
986 if (smo->smo_object == 0) |
|
987 return (0); |
|
988 |
|
989 db = dmu_bonus_hold(spa->spa_meta_objset, smo->smo_object); |
|
990 dmu_buf_read(db); |
|
991 ASSERT3U(db->db_size, ==, sizeof (*smo)); |
|
992 bcopy(db->db_data, smo, db->db_size); |
|
993 dmu_buf_rele(db); |
|
994 |
|
995 mutex_enter(&vd->vdev_dtl_lock); |
|
996 error = space_map_load(&vd->vdev_dtl_map, smo, SM_ALLOC, |
|
997 spa->spa_meta_objset, smo->smo_objsize, smo->smo_alloc); |
|
998 mutex_exit(&vd->vdev_dtl_lock); |
|
999 |
|
1000 return (error); |
|
1001 } |
|
1002 |
|
1003 void |
|
1004 vdev_dtl_sync(vdev_t *vd, uint64_t txg) |
|
1005 { |
|
1006 spa_t *spa = vd->vdev_spa; |
|
1007 space_map_obj_t *smo = &vd->vdev_dtl; |
|
1008 space_map_t *sm = &vd->vdev_dtl_map; |
|
1009 space_map_t smsync; |
|
1010 kmutex_t smlock; |
|
1011 avl_tree_t *t = &sm->sm_root; |
|
1012 space_seg_t *ss; |
|
1013 dmu_buf_t *db; |
|
1014 dmu_tx_t *tx; |
|
1015 |
|
1016 dprintf("%s in txg %llu pass %d\n", |
|
1017 vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); |
|
1018 |
|
1019 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); |
|
1020 |
|
1021 if (vd->vdev_detached) { |
|
1022 if (smo->smo_object != 0) { |
|
1023 int err = dmu_object_free(spa->spa_meta_objset, |
|
1024 smo->smo_object, tx); |
|
1025 ASSERT3U(err, ==, 0); |
|
1026 smo->smo_object = 0; |
|
1027 } |
|
1028 dmu_tx_commit(tx); |
|
1029 return; |
|
1030 } |
|
1031 |
|
1032 if (smo->smo_object == 0) { |
|
1033 ASSERT(smo->smo_objsize == 0); |
|
1034 ASSERT(smo->smo_alloc == 0); |
|
1035 smo->smo_object = dmu_object_alloc(spa->spa_meta_objset, |
|
1036 DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, |
|
1037 DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); |
|
1038 ASSERT(smo->smo_object != 0); |
|
1039 vdev_config_dirty(vd->vdev_top); |
|
1040 } |
|
1041 |
|
1042 dmu_free_range(spa->spa_meta_objset, smo->smo_object, |
|
1043 0, smo->smo_objsize, tx); |
|
1044 |
|
1045 mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL); |
|
1046 |
|
1047 space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift, |
|
1048 &smlock); |
|
1049 |
|
1050 mutex_enter(&smlock); |
|
1051 |
|
1052 mutex_enter(&vd->vdev_dtl_lock); |
|
1053 for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) |
|
1054 space_map_add(&smsync, ss->ss_start, ss->ss_end - ss->ss_start); |
|
1055 mutex_exit(&vd->vdev_dtl_lock); |
|
1056 |
|
1057 smo->smo_objsize = 0; |
|
1058 smo->smo_alloc = smsync.sm_space; |
|
1059 |
|
1060 space_map_sync(&smsync, NULL, smo, SM_ALLOC, spa->spa_meta_objset, tx); |
|
1061 space_map_destroy(&smsync); |
|
1062 |
|
1063 mutex_exit(&smlock); |
|
1064 mutex_destroy(&smlock); |
|
1065 |
|
1066 db = dmu_bonus_hold(spa->spa_meta_objset, smo->smo_object); |
|
1067 dmu_buf_will_dirty(db, tx); |
|
1068 ASSERT3U(db->db_size, ==, sizeof (*smo)); |
|
1069 bcopy(smo, db->db_data, db->db_size); |
|
1070 dmu_buf_rele(db); |
|
1071 |
|
1072 dmu_tx_commit(tx); |
|
1073 } |
|
1074 |
|
1075 int |
|
1076 vdev_load(vdev_t *vd, int import) |
|
1077 { |
|
1078 spa_t *spa = vd->vdev_spa; |
|
1079 int c, error; |
|
1080 nvlist_t *label; |
|
1081 uint64_t guid, state; |
|
1082 |
|
1083 dprintf("loading %s\n", vdev_description(vd)); |
|
1084 |
|
1085 /* |
|
1086 * Recursively load all children. |
|
1087 */ |
|
1088 for (c = 0; c < vd->vdev_children; c++) |
|
1089 if ((error = vdev_load(vd->vdev_child[c], import)) != 0) |
|
1090 return (error); |
|
1091 |
|
1092 /* |
|
1093 * If this is a leaf vdev, make sure its agrees with its disk labels. |
|
1094 */ |
|
1095 if (vd->vdev_ops->vdev_op_leaf) { |
|
1096 |
|
1097 if (vdev_is_dead(vd)) |
|
1098 return (0); |
|
1099 |
|
1100 /* |
|
1101 * XXX state transitions don't propagate to parent here. |
|
1102 * Also, merely setting the state isn't sufficient because |
|
1103 * it's not persistent; a vdev_reopen() would make us |
|
1104 * forget all about it. |
|
1105 */ |
|
1106 if ((label = vdev_label_read_config(vd)) == NULL) { |
|
1107 dprintf("can't load label config\n"); |
|
1108 vdev_set_state(vd, VDEV_STATE_CANT_OPEN, |
|
1109 VDEV_AUX_CORRUPT_DATA); |
|
1110 return (0); |
|
1111 } |
|
1112 |
|
1113 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, |
|
1114 &guid) != 0 || guid != spa_guid(spa)) { |
|
1115 dprintf("bad or missing pool GUID (%llu)\n", guid); |
|
1116 vdev_set_state(vd, VDEV_STATE_CANT_OPEN, |
|
1117 VDEV_AUX_CORRUPT_DATA); |
|
1118 nvlist_free(label); |
|
1119 return (0); |
|
1120 } |
|
1121 |
|
1122 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) || |
|
1123 guid != vd->vdev_guid) { |
|
1124 dprintf("bad or missing vdev guid (%llu != %llu)\n", |
|
1125 guid, vd->vdev_guid); |
|
1126 vdev_set_state(vd, VDEV_STATE_CANT_OPEN, |
|
1127 VDEV_AUX_CORRUPT_DATA); |
|
1128 nvlist_free(label); |
|
1129 return (0); |
|
1130 } |
|
1131 |
|
1132 /* |
|
1133 * If we find a vdev with a matching pool guid and vdev guid, |
|
1134 * but the pool state is not active, it indicates that the user |
|
1135 * exported or destroyed the pool without affecting the config |
|
1136 * cache (if / was mounted readonly, for example). In this |
|
1137 * case, immediately return EBADF so the caller can remove it |
|
1138 * from the config. |
|
1139 */ |
|
1140 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, |
|
1141 &state)) { |
|
1142 dprintf("missing pool state\n"); |
|
1143 vdev_set_state(vd, VDEV_STATE_CANT_OPEN, |
|
1144 VDEV_AUX_CORRUPT_DATA); |
|
1145 nvlist_free(label); |
|
1146 return (0); |
|
1147 } |
|
1148 |
|
1149 if (state != POOL_STATE_ACTIVE && |
|
1150 (!import || state != POOL_STATE_EXPORTED)) { |
|
1151 dprintf("pool state not active (%llu)\n", state); |
|
1152 nvlist_free(label); |
|
1153 return (EBADF); |
|
1154 } |
|
1155 |
|
1156 nvlist_free(label); |
|
1157 } |
|
1158 |
|
1159 /* |
|
1160 * If this is a top-level vdev, make sure its allocation parameters |
|
1161 * exist and initialize its metaslabs. |
|
1162 */ |
|
1163 if (vd == vd->vdev_top) { |
|
1164 |
|
1165 if (vd->vdev_ms_array == 0 || |
|
1166 vd->vdev_ms_shift == 0 || |
|
1167 vd->vdev_ashift == 0 || |
|
1168 vd->vdev_asize == 0) { |
|
1169 vdev_set_state(vd, VDEV_STATE_CANT_OPEN, |
|
1170 VDEV_AUX_CORRUPT_DATA); |
|
1171 return (0); |
|
1172 } |
|
1173 |
|
1174 vdev_metaslab_init(vd, 0); |
|
1175 } |
|
1176 |
|
1177 /* |
|
1178 * If this is a leaf vdev, load its DTL. |
|
1179 */ |
|
1180 if (vd->vdev_ops->vdev_op_leaf) { |
|
1181 error = vdev_dtl_load(vd); |
|
1182 if (error) { |
|
1183 dprintf("can't load DTL for %s, error %d\n", |
|
1184 vdev_description(vd), error); |
|
1185 vdev_set_state(vd, VDEV_STATE_CANT_OPEN, |
|
1186 VDEV_AUX_CORRUPT_DATA); |
|
1187 return (0); |
|
1188 } |
|
1189 } |
|
1190 |
|
1191 return (0); |
|
1192 } |
|
1193 |
|
1194 void |
|
1195 vdev_sync_done(vdev_t *vd, uint64_t txg) |
|
1196 { |
|
1197 metaslab_t *msp; |
|
1198 |
|
1199 dprintf("%s txg %llu\n", vdev_description(vd), txg); |
|
1200 |
|
1201 while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) |
|
1202 metaslab_sync_done(msp, txg); |
|
1203 } |
|
1204 |
|
1205 void |
|
1206 vdev_add_sync(vdev_t *vd, uint64_t txg) |
|
1207 { |
|
1208 spa_t *spa = vd->vdev_spa; |
|
1209 dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); |
|
1210 |
|
1211 ASSERT(vd == vd->vdev_top); |
|
1212 |
|
1213 if (vd->vdev_ms_array == 0) |
|
1214 vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset, |
|
1215 DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx); |
|
1216 |
|
1217 ASSERT(vd->vdev_ms_array != 0); |
|
1218 |
|
1219 vdev_config_dirty(vd); |
|
1220 |
|
1221 dmu_tx_commit(tx); |
|
1222 } |
|
1223 |
|
1224 void |
|
1225 vdev_sync(vdev_t *vd, uint64_t txg) |
|
1226 { |
|
1227 spa_t *spa = vd->vdev_spa; |
|
1228 vdev_t *lvd; |
|
1229 metaslab_t *msp; |
|
1230 uint8_t *dirtyp = &vd->vdev_dirty[txg & TXG_MASK]; |
|
1231 uint8_t dirty = *dirtyp; |
|
1232 |
|
1233 mutex_enter(&vd->vdev_dirty_lock); |
|
1234 *dirtyp &= ~(VDD_ALLOC | VDD_FREE | VDD_ADD | VDD_DTL); |
|
1235 mutex_exit(&vd->vdev_dirty_lock); |
|
1236 |
|
1237 dprintf("%s txg %llu pass %d\n", |
|
1238 vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); |
|
1239 |
|
1240 if (dirty & VDD_ADD) |
|
1241 vdev_add_sync(vd, txg); |
|
1242 |
|
1243 while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) |
|
1244 metaslab_sync(msp, txg); |
|
1245 |
|
1246 while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL) |
|
1247 vdev_dtl_sync(lvd, txg); |
|
1248 |
|
1249 (void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)); |
|
1250 } |
|
1251 |
|
1252 uint64_t |
|
1253 vdev_psize_to_asize(vdev_t *vd, uint64_t psize) |
|
1254 { |
|
1255 return (vd->vdev_ops->vdev_op_asize(vd, psize)); |
|
1256 } |
|
1257 |
|
1258 void |
|
1259 vdev_io_start(zio_t *zio) |
|
1260 { |
|
1261 zio->io_vd->vdev_ops->vdev_op_io_start(zio); |
|
1262 } |
|
1263 |
|
1264 void |
|
1265 vdev_io_done(zio_t *zio) |
|
1266 { |
|
1267 zio->io_vd->vdev_ops->vdev_op_io_done(zio); |
|
1268 } |
|
1269 |
|
1270 const char * |
|
1271 vdev_description(vdev_t *vd) |
|
1272 { |
|
1273 if (vd == NULL || vd->vdev_ops == NULL) |
|
1274 return ("<unknown>"); |
|
1275 |
|
1276 if (vd->vdev_path != NULL) |
|
1277 return (vd->vdev_path); |
|
1278 |
|
1279 if (vd->vdev_parent == NULL) |
|
1280 return (spa_name(vd->vdev_spa)); |
|
1281 |
|
1282 return (vd->vdev_ops->vdev_op_type); |
|
1283 } |
|
1284 |
|
1285 int |
|
1286 vdev_online(spa_t *spa, const char *path) |
|
1287 { |
|
1288 vdev_t *vd; |
|
1289 |
|
1290 spa_config_enter(spa, RW_WRITER); |
|
1291 |
|
1292 if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { |
|
1293 spa_config_exit(spa); |
|
1294 return (ENODEV); |
|
1295 } |
|
1296 |
|
1297 dprintf("ONLINE: %s\n", vdev_description(vd)); |
|
1298 |
|
1299 vd->vdev_offline = B_FALSE; |
|
1300 |
|
1301 /* |
|
1302 * Clear the error counts. The idea is that you expect to see all |
|
1303 * zeroes when everything is working, so if you've just onlined a |
|
1304 * device, you don't want to keep hearing about errors from before. |
|
1305 */ |
|
1306 vd->vdev_stat.vs_read_errors = 0; |
|
1307 vd->vdev_stat.vs_write_errors = 0; |
|
1308 vd->vdev_stat.vs_checksum_errors = 0; |
|
1309 |
|
1310 vdev_reopen(vd->vdev_top, NULL); |
|
1311 |
|
1312 spa_config_exit(spa); |
|
1313 |
|
1314 VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
|
1315 |
|
1316 return (0); |
|
1317 } |
|
1318 |
|
1319 int |
|
1320 vdev_offline(spa_t *spa, const char *path) |
|
1321 { |
|
1322 vdev_t *vd; |
|
1323 |
|
1324 spa_config_enter(spa, RW_WRITER); |
|
1325 |
|
1326 if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { |
|
1327 spa_config_exit(spa); |
|
1328 return (ENODEV); |
|
1329 } |
|
1330 |
|
1331 dprintf("OFFLINE: %s\n", vdev_description(vd)); |
|
1332 |
|
1333 /* |
|
1334 * If this device's top-level vdev has a non-empty DTL, |
|
1335 * don't allow the device to be offlined. |
|
1336 * |
|
1337 * XXX -- we should make this more precise by allowing the offline |
|
1338 * as long as the remaining devices don't have any DTL holes. |
|
1339 */ |
|
1340 if (vd->vdev_top->vdev_dtl_map.sm_space != 0) { |
|
1341 spa_config_exit(spa); |
|
1342 return (EBUSY); |
|
1343 } |
|
1344 |
|
1345 /* |
|
1346 * Set this device to offline state and reopen its top-level vdev. |
|
1347 * If this action results in the top-level vdev becoming unusable, |
|
1348 * undo it and fail the request. |
|
1349 */ |
|
1350 vd->vdev_offline = B_TRUE; |
|
1351 vdev_reopen(vd->vdev_top, NULL); |
|
1352 if (vdev_is_dead(vd->vdev_top)) { |
|
1353 vd->vdev_offline = B_FALSE; |
|
1354 vdev_reopen(vd->vdev_top, NULL); |
|
1355 spa_config_exit(spa); |
|
1356 return (EBUSY); |
|
1357 } |
|
1358 |
|
1359 spa_config_exit(spa); |
|
1360 |
|
1361 return (0); |
|
1362 } |
|
1363 |
|
1364 int |
|
1365 vdev_error_setup(spa_t *spa, const char *path, int mode, int mask, uint64_t arg) |
|
1366 { |
|
1367 vdev_t *vd; |
|
1368 |
|
1369 spa_config_enter(spa, RW_WRITER); |
|
1370 |
|
1371 if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, path)) == NULL) { |
|
1372 spa_config_exit(spa); |
|
1373 return (ENODEV); |
|
1374 } |
|
1375 |
|
1376 vd->vdev_fault_mode = mode; |
|
1377 vd->vdev_fault_mask = mask; |
|
1378 vd->vdev_fault_arg = arg; |
|
1379 |
|
1380 spa_config_exit(spa); |
|
1381 |
|
1382 return (0); |
|
1383 } |
|
1384 |
|
1385 int |
|
1386 vdev_is_dead(vdev_t *vd) |
|
1387 { |
|
1388 return (vd->vdev_state <= VDEV_STATE_CANT_OPEN); |
|
1389 } |
|
1390 |
|
1391 int |
|
1392 vdev_error_inject(vdev_t *vd, zio_t *zio) |
|
1393 { |
|
1394 int error = 0; |
|
1395 |
|
1396 if (vd->vdev_fault_mode == VDEV_FAULT_NONE) |
|
1397 return (0); |
|
1398 |
|
1399 if (((1ULL << zio->io_type) & vd->vdev_fault_mask) == 0) |
|
1400 return (0); |
|
1401 |
|
1402 switch (vd->vdev_fault_mode) { |
|
1403 case VDEV_FAULT_RANDOM: |
|
1404 if (spa_get_random(vd->vdev_fault_arg) == 0) |
|
1405 error = EIO; |
|
1406 break; |
|
1407 |
|
1408 case VDEV_FAULT_COUNT: |
|
1409 if ((int64_t)--vd->vdev_fault_arg <= 0) |
|
1410 vd->vdev_fault_mode = VDEV_FAULT_NONE; |
|
1411 error = EIO; |
|
1412 break; |
|
1413 } |
|
1414 |
|
1415 if (error != 0) { |
|
1416 dprintf("returning %d for type %d on %s state %d offset %llx\n", |
|
1417 error, zio->io_type, vdev_description(vd), |
|
1418 vd->vdev_state, zio->io_offset); |
|
1419 } |
|
1420 |
|
1421 return (error); |
|
1422 } |
|
1423 |
|
1424 /* |
|
1425 * Get statistics for the given vdev. |
|
1426 */ |
|
1427 void |
|
1428 vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) |
|
1429 { |
|
1430 vdev_t *rvd = vd->vdev_spa->spa_root_vdev; |
|
1431 int c, t; |
|
1432 |
|
1433 mutex_enter(&vd->vdev_stat_lock); |
|
1434 bcopy(&vd->vdev_stat, vs, sizeof (*vs)); |
|
1435 vs->vs_timestamp = gethrtime() - vs->vs_timestamp; |
|
1436 vs->vs_state = vd->vdev_state; |
|
1437 mutex_exit(&vd->vdev_stat_lock); |
|
1438 |
|
1439 /* |
|
1440 * If we're getting stats on the root vdev, aggregate the I/O counts |
|
1441 * over all top-level vdevs (i.e. the direct children of the root). |
|
1442 */ |
|
1443 if (vd == rvd) { |
|
1444 for (c = 0; c < rvd->vdev_children; c++) { |
|
1445 vdev_t *cvd = rvd->vdev_child[c]; |
|
1446 vdev_stat_t *cvs = &cvd->vdev_stat; |
|
1447 |
|
1448 mutex_enter(&vd->vdev_stat_lock); |
|
1449 for (t = 0; t < ZIO_TYPES; t++) { |
|
1450 vs->vs_ops[t] += cvs->vs_ops[t]; |
|
1451 vs->vs_bytes[t] += cvs->vs_bytes[t]; |
|
1452 } |
|
1453 vs->vs_read_errors += cvs->vs_read_errors; |
|
1454 vs->vs_write_errors += cvs->vs_write_errors; |
|
1455 vs->vs_checksum_errors += cvs->vs_checksum_errors; |
|
1456 vs->vs_scrub_examined += cvs->vs_scrub_examined; |
|
1457 vs->vs_scrub_errors += cvs->vs_scrub_errors; |
|
1458 mutex_exit(&vd->vdev_stat_lock); |
|
1459 } |
|
1460 } |
|
1461 } |
|
1462 |
|
1463 void |
|
1464 vdev_stat_update(zio_t *zio) |
|
1465 { |
|
1466 vdev_t *vd = zio->io_vd; |
|
1467 vdev_t *pvd; |
|
1468 uint64_t txg = zio->io_txg; |
|
1469 vdev_stat_t *vs = &vd->vdev_stat; |
|
1470 zio_type_t type = zio->io_type; |
|
1471 int flags = zio->io_flags; |
|
1472 |
|
1473 if (zio->io_error == 0) { |
|
1474 if (!(flags & ZIO_FLAG_IO_BYPASS)) { |
|
1475 mutex_enter(&vd->vdev_stat_lock); |
|
1476 vs->vs_ops[type]++; |
|
1477 vs->vs_bytes[type] += zio->io_size; |
|
1478 mutex_exit(&vd->vdev_stat_lock); |
|
1479 } |
|
1480 if ((flags & ZIO_FLAG_IO_REPAIR) && |
|
1481 zio->io_delegate_list == NULL) { |
|
1482 mutex_enter(&vd->vdev_stat_lock); |
|
1483 if (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) |
|
1484 vs->vs_scrub_repaired += zio->io_size; |
|
1485 else |
|
1486 vs->vs_self_healed += zio->io_size; |
|
1487 mutex_exit(&vd->vdev_stat_lock); |
|
1488 } |
|
1489 return; |
|
1490 } |
|
1491 |
|
1492 if (flags & ZIO_FLAG_SPECULATIVE) |
|
1493 return; |
|
1494 |
|
1495 if (!vdev_is_dead(vd)) { |
|
1496 mutex_enter(&vd->vdev_stat_lock); |
|
1497 if (type == ZIO_TYPE_READ) { |
|
1498 if (zio->io_error == ECKSUM) |
|
1499 vs->vs_checksum_errors++; |
|
1500 else |
|
1501 vs->vs_read_errors++; |
|
1502 } |
|
1503 if (type == ZIO_TYPE_WRITE) |
|
1504 vs->vs_write_errors++; |
|
1505 mutex_exit(&vd->vdev_stat_lock); |
|
1506 } |
|
1507 |
|
1508 if (type == ZIO_TYPE_WRITE) { |
|
1509 if (txg == 0 || vd->vdev_children != 0) |
|
1510 return; |
|
1511 if (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)) { |
|
1512 ASSERT(flags & ZIO_FLAG_IO_REPAIR); |
|
1513 for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) |
|
1514 vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1); |
|
1515 } |
|
1516 if (!(flags & ZIO_FLAG_IO_REPAIR)) { |
|
1517 vdev_t *tvd = vd->vdev_top; |
|
1518 if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1)) |
|
1519 return; |
|
1520 vdev_dirty(tvd, VDD_DTL, txg); |
|
1521 (void) txg_list_add(&tvd->vdev_dtl_list, vd, txg); |
|
1522 for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) |
|
1523 vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1); |
|
1524 } |
|
1525 } |
|
1526 } |
|
1527 |
|
1528 void |
|
1529 vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete) |
|
1530 { |
|
1531 int c; |
|
1532 vdev_stat_t *vs = &vd->vdev_stat; |
|
1533 |
|
1534 for (c = 0; c < vd->vdev_children; c++) |
|
1535 vdev_scrub_stat_update(vd->vdev_child[c], type, complete); |
|
1536 |
|
1537 mutex_enter(&vd->vdev_stat_lock); |
|
1538 |
|
1539 if (type == POOL_SCRUB_NONE) { |
|
1540 /* |
|
1541 * Update completion and end time. Leave everything else alone |
|
1542 * so we can report what happened during the previous scrub. |
|
1543 */ |
|
1544 vs->vs_scrub_complete = complete; |
|
1545 vs->vs_scrub_end = gethrestime_sec(); |
|
1546 } else { |
|
1547 vs->vs_scrub_type = type; |
|
1548 vs->vs_scrub_complete = 0; |
|
1549 vs->vs_scrub_examined = 0; |
|
1550 vs->vs_scrub_repaired = 0; |
|
1551 vs->vs_scrub_errors = 0; |
|
1552 vs->vs_scrub_start = gethrestime_sec(); |
|
1553 vs->vs_scrub_end = 0; |
|
1554 } |
|
1555 |
|
1556 mutex_exit(&vd->vdev_stat_lock); |
|
1557 } |
|
1558 |
|
1559 /* |
|
1560 * Report checksum errors that a vdev that didn't realize it made. |
|
1561 * This can happen, for example, when RAID-Z combinatorial reconstruction |
|
1562 * infers that one of its components returned bad data. |
|
1563 */ |
|
1564 void |
|
1565 vdev_checksum_error(zio_t *zio, vdev_t *vd) |
|
1566 { |
|
1567 dprintf_bp(zio->io_bp, "imputed checksum error on %s: ", |
|
1568 vdev_description(vd)); |
|
1569 |
|
1570 if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { |
|
1571 mutex_enter(&vd->vdev_stat_lock); |
|
1572 vd->vdev_stat.vs_checksum_errors++; |
|
1573 mutex_exit(&vd->vdev_stat_lock); |
|
1574 } |
|
1575 } |
|
1576 |
|
1577 /* |
|
1578 * Update the in-core space usage stats for this vdev and the root vdev. |
|
1579 */ |
|
1580 void |
|
1581 vdev_space_update(vdev_t *vd, uint64_t space_delta, uint64_t alloc_delta) |
|
1582 { |
|
1583 ASSERT(vd == vd->vdev_top); |
|
1584 |
|
1585 do { |
|
1586 mutex_enter(&vd->vdev_stat_lock); |
|
1587 vd->vdev_stat.vs_space += space_delta; |
|
1588 vd->vdev_stat.vs_alloc += alloc_delta; |
|
1589 mutex_exit(&vd->vdev_stat_lock); |
|
1590 } while ((vd = vd->vdev_parent) != NULL); |
|
1591 } |
|
1592 |
|
1593 /* |
|
1594 * Various knobs to tune a vdev. |
|
1595 */ |
|
1596 static vdev_knob_t vdev_knob[] = { |
|
1597 { |
|
1598 "cache_size", |
|
1599 "size of the read-ahead cache", |
|
1600 0, |
|
1601 1ULL << 30, |
|
1602 10ULL << 20, |
|
1603 offsetof(struct vdev, vdev_cache.vc_size) |
|
1604 }, |
|
1605 { |
|
1606 "cache_bshift", |
|
1607 "log2 of cache blocksize", |
|
1608 SPA_MINBLOCKSHIFT, |
|
1609 SPA_MAXBLOCKSHIFT, |
|
1610 16, |
|
1611 offsetof(struct vdev, vdev_cache.vc_bshift) |
|
1612 }, |
|
1613 { |
|
1614 "cache_max", |
|
1615 "largest block size to cache", |
|
1616 0, |
|
1617 SPA_MAXBLOCKSIZE, |
|
1618 1ULL << 14, |
|
1619 offsetof(struct vdev, vdev_cache.vc_max) |
|
1620 }, |
|
1621 { |
|
1622 "min_pending", |
|
1623 "minimum pending I/Os to the disk", |
|
1624 1, |
|
1625 10000, |
|
1626 2, |
|
1627 offsetof(struct vdev, vdev_queue.vq_min_pending) |
|
1628 }, |
|
1629 { |
|
1630 "max_pending", |
|
1631 "maximum pending I/Os to the disk", |
|
1632 1, |
|
1633 10000, |
|
1634 35, |
|
1635 offsetof(struct vdev, vdev_queue.vq_max_pending) |
|
1636 }, |
|
1637 { |
|
1638 "agg_limit", |
|
1639 "maximum size of aggregated I/Os", |
|
1640 0, |
|
1641 SPA_MAXBLOCKSIZE, |
|
1642 SPA_MAXBLOCKSIZE, |
|
1643 offsetof(struct vdev, vdev_queue.vq_agg_limit) |
|
1644 }, |
|
1645 { |
|
1646 "time_shift", |
|
1647 "deadline = pri + (lbolt >> time_shift)", |
|
1648 0, |
|
1649 63, |
|
1650 4, |
|
1651 offsetof(struct vdev, vdev_queue.vq_time_shift) |
|
1652 }, |
|
1653 { |
|
1654 "ramp_rate", |
|
1655 "exponential I/O issue ramp-up rate", |
|
1656 1, |
|
1657 10000, |
|
1658 2, |
|
1659 offsetof(struct vdev, vdev_queue.vq_ramp_rate) |
|
1660 }, |
|
1661 }; |
|
1662 |
|
1663 vdev_knob_t * |
|
1664 vdev_knob_next(vdev_knob_t *vk) |
|
1665 { |
|
1666 if (vk == NULL) |
|
1667 return (vdev_knob); |
|
1668 |
|
1669 if (++vk == vdev_knob + sizeof (vdev_knob) / sizeof (vdev_knob_t)) |
|
1670 return (NULL); |
|
1671 |
|
1672 return (vk); |
|
1673 } |
|
1674 |
|
1675 /* |
|
1676 * Mark a top-level vdev's config as dirty, placing it on the dirty list |
|
1677 * so that it will be written out next time the vdev configuration is synced. |
|
1678 * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs. |
|
1679 */ |
|
1680 void |
|
1681 vdev_config_dirty(vdev_t *vd) |
|
1682 { |
|
1683 spa_t *spa = vd->vdev_spa; |
|
1684 vdev_t *rvd = spa->spa_root_vdev; |
|
1685 int c; |
|
1686 |
|
1687 if (vd == rvd) { |
|
1688 for (c = 0; c < rvd->vdev_children; c++) |
|
1689 vdev_config_dirty(rvd->vdev_child[c]); |
|
1690 } else { |
|
1691 ASSERT(vd == vd->vdev_top); |
|
1692 |
|
1693 if (!vd->vdev_is_dirty) { |
|
1694 list_insert_head(&spa->spa_dirty_list, vd); |
|
1695 vd->vdev_is_dirty = B_TRUE; |
|
1696 } |
|
1697 } |
|
1698 } |
|
1699 |
|
1700 void |
|
1701 vdev_config_clean(vdev_t *vd) |
|
1702 { |
|
1703 ASSERT(vd->vdev_is_dirty); |
|
1704 |
|
1705 list_remove(&vd->vdev_spa->spa_dirty_list, vd); |
|
1706 vd->vdev_is_dirty = B_FALSE; |
|
1707 } |
|
1708 |
|
1709 /* |
|
1710 * Set a vdev's state, updating any parent's state as well. |
|
1711 */ |
|
1712 void |
|
1713 vdev_set_state(vdev_t *vd, vdev_state_t state, vdev_aux_t aux) |
|
1714 { |
|
1715 if (state == vd->vdev_state) |
|
1716 return; |
|
1717 |
|
1718 vd->vdev_state = state; |
|
1719 vd->vdev_stat.vs_aux = aux; |
|
1720 |
|
1721 if (vd->vdev_parent != NULL) { |
|
1722 int c; |
|
1723 int degraded = 0, faulted = 0; |
|
1724 vdev_t *parent, *child; |
|
1725 |
|
1726 parent = vd->vdev_parent; |
|
1727 for (c = 0; c < parent->vdev_children; c++) { |
|
1728 child = parent->vdev_child[c]; |
|
1729 if (child->vdev_state <= VDEV_STATE_CANT_OPEN) |
|
1730 faulted++; |
|
1731 else if (child->vdev_state == VDEV_STATE_DEGRADED) |
|
1732 degraded++; |
|
1733 } |
|
1734 |
|
1735 vd->vdev_parent->vdev_ops->vdev_op_state_change( |
|
1736 vd->vdev_parent, faulted, degraded); |
|
1737 } |
|
1738 } |