author | ek110237 |
Wed, 25 Jun 2008 15:48:48 -0700 | |
changeset 6959 | f223e134ee61 |
parent 6643 | 3a34b0dbb107 |
child 6976 | cae5f06df471 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1485 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
2082 | 21 |
|
789 | 22 |
/* |
6523
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
23 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
789 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
#include <sys/zfs_context.h> |
|
1544 | 30 |
#include <sys/fm/fs/zfs.h> |
789 | 31 |
#include <sys/spa.h> |
32 |
#include <sys/spa_impl.h> |
|
33 |
#include <sys/dmu.h> |
|
34 |
#include <sys/dmu_tx.h> |
|
35 |
#include <sys/vdev_impl.h> |
|
36 |
#include <sys/uberblock_impl.h> |
|
37 |
#include <sys/metaslab.h> |
|
38 |
#include <sys/metaslab_impl.h> |
|
39 |
#include <sys/space_map.h> |
|
40 |
#include <sys/zio.h> |
|
41 |
#include <sys/zap.h> |
|
42 |
#include <sys/fs/zfs.h> |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
43 |
#include <sys/arc.h> |
789 | 44 |
|
45 |
/* |
|
46 |
* Virtual device management. |
|
47 |
*/ |
|
48 |
||
49 |
static vdev_ops_t *vdev_ops_table[] = { |
|
50 |
&vdev_root_ops, |
|
51 |
&vdev_raidz_ops, |
|
52 |
&vdev_mirror_ops, |
|
53 |
&vdev_replacing_ops, |
|
2082 | 54 |
&vdev_spare_ops, |
789 | 55 |
&vdev_disk_ops, |
56 |
&vdev_file_ops, |
|
57 |
&vdev_missing_ops, |
|
58 |
NULL |
|
59 |
}; |
|
60 |
||
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
61 |
/* maximum scrub/resilver I/O queue */ |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
62 |
int zfs_scrub_limit = 70; |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
63 |
|
789 | 64 |
/* |
65 |
* Given a vdev type, return the appropriate ops vector. |
|
66 |
*/ |
|
67 |
static vdev_ops_t * |
|
68 |
vdev_getops(const char *type) |
|
69 |
{ |
|
70 |
vdev_ops_t *ops, **opspp; |
|
71 |
||
72 |
for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) |
|
73 |
if (strcmp(ops->vdev_op_type, type) == 0) |
|
74 |
break; |
|
75 |
||
76 |
return (ops); |
|
77 |
} |
|
78 |
||
79 |
/* |
|
80 |
* Default asize function: return the MAX of psize with the asize of |
|
81 |
* all children. This is what's used by anything other than RAID-Z. |
|
82 |
*/ |
|
83 |
uint64_t |
|
84 |
vdev_default_asize(vdev_t *vd, uint64_t psize) |
|
85 |
{ |
|
1732 | 86 |
uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift); |
789 | 87 |
uint64_t csize; |
88 |
uint64_t c; |
|
89 |
||
90 |
for (c = 0; c < vd->vdev_children; c++) { |
|
91 |
csize = vdev_psize_to_asize(vd->vdev_child[c], psize); |
|
92 |
asize = MAX(asize, csize); |
|
93 |
} |
|
94 |
||
95 |
return (asize); |
|
96 |
} |
|
97 |
||
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
98 |
/* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
99 |
* Get the replaceable or attachable device size. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
100 |
* If the parent is a mirror or raidz, the replaceable size is the minimum |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
101 |
* psize of all its children. For the rest, just return our own psize. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
102 |
* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
103 |
* e.g. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
104 |
* psize rsize |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
105 |
* root - - |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
106 |
* mirror/raidz - - |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
107 |
* disk1 20g 20g |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
108 |
* disk2 40g 20g |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
109 |
* disk3 80g 80g |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
110 |
*/ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
111 |
uint64_t |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
112 |
vdev_get_rsize(vdev_t *vd) |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
113 |
{ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
114 |
vdev_t *pvd, *cvd; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
115 |
uint64_t c, rsize; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
116 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
117 |
pvd = vd->vdev_parent; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
118 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
119 |
/* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
120 |
* If our parent is NULL or the root, just return our own psize. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
121 |
*/ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
122 |
if (pvd == NULL || pvd->vdev_parent == NULL) |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
123 |
return (vd->vdev_psize); |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
124 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
125 |
rsize = 0; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
126 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
127 |
for (c = 0; c < pvd->vdev_children; c++) { |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
128 |
cvd = pvd->vdev_child[c]; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
129 |
rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
130 |
} |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
131 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
132 |
return (rsize); |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
133 |
} |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
134 |
|
789 | 135 |
vdev_t * |
136 |
vdev_lookup_top(spa_t *spa, uint64_t vdev) |
|
137 |
{ |
|
138 |
vdev_t *rvd = spa->spa_root_vdev; |
|
139 |
||
5530 | 140 |
ASSERT(spa_config_held(spa, RW_READER) || |
141 |
curthread == spa->spa_scrub_thread); |
|
142 |
||
789 | 143 |
if (vdev < rvd->vdev_children) |
144 |
return (rvd->vdev_child[vdev]); |
|
145 |
||
146 |
return (NULL); |
|
147 |
} |
|
148 |
||
149 |
vdev_t * |
|
150 |
vdev_lookup_by_guid(vdev_t *vd, uint64_t guid) |
|
151 |
{ |
|
152 |
int c; |
|
153 |
vdev_t *mvd; |
|
154 |
||
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
155 |
if (vd->vdev_guid == guid) |
789 | 156 |
return (vd); |
157 |
||
158 |
for (c = 0; c < vd->vdev_children; c++) |
|
159 |
if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) != |
|
160 |
NULL) |
|
161 |
return (mvd); |
|
162 |
||
163 |
return (NULL); |
|
164 |
} |
|
165 |
||
166 |
void |
|
167 |
vdev_add_child(vdev_t *pvd, vdev_t *cvd) |
|
168 |
{ |
|
169 |
size_t oldsize, newsize; |
|
170 |
uint64_t id = cvd->vdev_id; |
|
171 |
vdev_t **newchild; |
|
172 |
||
173 |
ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); |
|
174 |
ASSERT(cvd->vdev_parent == NULL); |
|
175 |
||
176 |
cvd->vdev_parent = pvd; |
|
177 |
||
178 |
if (pvd == NULL) |
|
179 |
return; |
|
180 |
||
181 |
ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL); |
|
182 |
||
183 |
oldsize = pvd->vdev_children * sizeof (vdev_t *); |
|
184 |
pvd->vdev_children = MAX(pvd->vdev_children, id + 1); |
|
185 |
newsize = pvd->vdev_children * sizeof (vdev_t *); |
|
186 |
||
187 |
newchild = kmem_zalloc(newsize, KM_SLEEP); |
|
188 |
if (pvd->vdev_child != NULL) { |
|
189 |
bcopy(pvd->vdev_child, newchild, oldsize); |
|
190 |
kmem_free(pvd->vdev_child, oldsize); |
|
191 |
} |
|
192 |
||
193 |
pvd->vdev_child = newchild; |
|
194 |
pvd->vdev_child[id] = cvd; |
|
195 |
||
196 |
cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); |
|
197 |
ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); |
|
198 |
||
199 |
/* |
|
200 |
* Walk up all ancestors to update guid sum. |
|
201 |
*/ |
|
202 |
for (; pvd != NULL; pvd = pvd->vdev_parent) |
|
203 |
pvd->vdev_guid_sum += cvd->vdev_guid_sum; |
|
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
204 |
|
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
205 |
if (cvd->vdev_ops->vdev_op_leaf) |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
206 |
cvd->vdev_spa->spa_scrub_maxinflight += zfs_scrub_limit; |
789 | 207 |
} |
208 |
||
209 |
void |
|
210 |
vdev_remove_child(vdev_t *pvd, vdev_t *cvd) |
|
211 |
{ |
|
212 |
int c; |
|
213 |
uint_t id = cvd->vdev_id; |
|
214 |
||
215 |
ASSERT(cvd->vdev_parent == pvd); |
|
216 |
||
217 |
if (pvd == NULL) |
|
218 |
return; |
|
219 |
||
220 |
ASSERT(id < pvd->vdev_children); |
|
221 |
ASSERT(pvd->vdev_child[id] == cvd); |
|
222 |
||
223 |
pvd->vdev_child[id] = NULL; |
|
224 |
cvd->vdev_parent = NULL; |
|
225 |
||
226 |
for (c = 0; c < pvd->vdev_children; c++) |
|
227 |
if (pvd->vdev_child[c]) |
|
228 |
break; |
|
229 |
||
230 |
if (c == pvd->vdev_children) { |
|
231 |
kmem_free(pvd->vdev_child, c * sizeof (vdev_t *)); |
|
232 |
pvd->vdev_child = NULL; |
|
233 |
pvd->vdev_children = 0; |
|
234 |
} |
|
235 |
||
236 |
/* |
|
237 |
* Walk up all ancestors to update guid sum. |
|
238 |
*/ |
|
239 |
for (; pvd != NULL; pvd = pvd->vdev_parent) |
|
240 |
pvd->vdev_guid_sum -= cvd->vdev_guid_sum; |
|
3697
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
241 |
|
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
242 |
if (cvd->vdev_ops->vdev_op_leaf) |
5340a4d98e0b
6456888 zpool scrubbing leads to memory exhaustion and system hang
mishra
parents:
3377
diff
changeset
|
243 |
cvd->vdev_spa->spa_scrub_maxinflight -= zfs_scrub_limit; |
789 | 244 |
} |
245 |
||
246 |
/* |
|
247 |
* Remove any holes in the child array. |
|
248 |
*/ |
|
249 |
void |
|
250 |
vdev_compact_children(vdev_t *pvd) |
|
251 |
{ |
|
252 |
vdev_t **newchild, *cvd; |
|
253 |
int oldc = pvd->vdev_children; |
|
254 |
int newc, c; |
|
255 |
||
256 |
ASSERT(spa_config_held(pvd->vdev_spa, RW_WRITER)); |
|
257 |
||
258 |
for (c = newc = 0; c < oldc; c++) |
|
259 |
if (pvd->vdev_child[c]) |
|
260 |
newc++; |
|
261 |
||
262 |
newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP); |
|
263 |
||
264 |
for (c = newc = 0; c < oldc; c++) { |
|
265 |
if ((cvd = pvd->vdev_child[c]) != NULL) { |
|
266 |
newchild[newc] = cvd; |
|
267 |
cvd->vdev_id = newc++; |
|
268 |
} |
|
269 |
} |
|
270 |
||
271 |
kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *)); |
|
272 |
pvd->vdev_child = newchild; |
|
273 |
pvd->vdev_children = newc; |
|
274 |
} |
|
275 |
||
276 |
/* |
|
277 |
* Allocate and minimally initialize a vdev_t. |
|
278 |
*/ |
|
279 |
static vdev_t * |
|
280 |
vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) |
|
281 |
{ |
|
282 |
vdev_t *vd; |
|
283 |
||
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
284 |
vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
285 |
|
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
286 |
if (spa->spa_root_vdev == NULL) { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
287 |
ASSERT(ops == &vdev_root_ops); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
288 |
spa->spa_root_vdev = vd; |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
289 |
} |
789 | 290 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
291 |
if (guid == 0) { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
292 |
if (spa->spa_root_vdev == vd) { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
293 |
/* |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
294 |
* The root vdev's guid will also be the pool guid, |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
295 |
* which must be unique among all pools. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
296 |
*/ |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
297 |
while (guid == 0 || spa_guid_exists(guid, 0)) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
298 |
guid = spa_get_random(-1ULL); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
299 |
} else { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
300 |
/* |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
301 |
* Any other vdev's guid must be unique within the pool. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
302 |
*/ |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
303 |
while (guid == 0 || |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
304 |
spa_guid_exists(spa_guid(spa), guid)) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
305 |
guid = spa_get_random(-1ULL); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
306 |
} |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
307 |
ASSERT(!spa_guid_exists(spa_guid(spa), guid)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
308 |
} |
789 | 309 |
|
310 |
vd->vdev_spa = spa; |
|
311 |
vd->vdev_id = id; |
|
312 |
vd->vdev_guid = guid; |
|
313 |
vd->vdev_guid_sum = guid; |
|
314 |
vd->vdev_ops = ops; |
|
315 |
vd->vdev_state = VDEV_STATE_CLOSED; |
|
316 |
||
317 |
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); |
|
2856 | 318 |
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL); |
789 | 319 |
space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock); |
320 |
space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock); |
|
321 |
txg_list_create(&vd->vdev_ms_list, |
|
322 |
offsetof(struct metaslab, ms_txg_node)); |
|
323 |
txg_list_create(&vd->vdev_dtl_list, |
|
324 |
offsetof(struct vdev, vdev_dtl_node)); |
|
325 |
vd->vdev_stat.vs_timestamp = gethrtime(); |
|
4451 | 326 |
vdev_queue_init(vd); |
327 |
vdev_cache_init(vd); |
|
789 | 328 |
|
329 |
return (vd); |
|
330 |
} |
|
331 |
||
332 |
/* |
|
333 |
* Allocate a new vdev. The 'alloctype' is used to control whether we are |
|
334 |
* creating a new vdev or loading an existing one - the behavior is slightly |
|
335 |
* different for each case. |
|
336 |
*/ |
|
2082 | 337 |
int |
338 |
vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, |
|
339 |
int alloctype) |
|
789 | 340 |
{ |
341 |
vdev_ops_t *ops; |
|
342 |
char *type; |
|
4527 | 343 |
uint64_t guid = 0, islog, nparity; |
789 | 344 |
vdev_t *vd; |
345 |
||
346 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
|
347 |
||
348 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) |
|
2082 | 349 |
return (EINVAL); |
789 | 350 |
|
351 |
if ((ops = vdev_getops(type)) == NULL) |
|
2082 | 352 |
return (EINVAL); |
789 | 353 |
|
354 |
/* |
|
355 |
* If this is a load, get the vdev guid from the nvlist. |
|
356 |
* Otherwise, vdev_alloc_common() will generate one for us. |
|
357 |
*/ |
|
358 |
if (alloctype == VDEV_ALLOC_LOAD) { |
|
359 |
uint64_t label_id; |
|
360 |
||
361 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || |
|
362 |
label_id != id) |
|
2082 | 363 |
return (EINVAL); |
789 | 364 |
|
365 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) |
|
2082 | 366 |
return (EINVAL); |
367 |
} else if (alloctype == VDEV_ALLOC_SPARE) { |
|
368 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) |
|
369 |
return (EINVAL); |
|
5450 | 370 |
} else if (alloctype == VDEV_ALLOC_L2CACHE) { |
371 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) |
|
372 |
return (EINVAL); |
|
789 | 373 |
} |
374 |
||
2082 | 375 |
/* |
376 |
* The first allocated vdev must be of type 'root'. |
|
377 |
*/ |
|
378 |
if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL) |
|
379 |
return (EINVAL); |
|
380 |
||
4527 | 381 |
/* |
382 |
* Determine whether we're a log vdev. |
|
383 |
*/ |
|
384 |
islog = 0; |
|
385 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &islog); |
|
5094 | 386 |
if (islog && spa_version(spa) < SPA_VERSION_SLOGS) |
4527 | 387 |
return (ENOTSUP); |
388 |
||
389 |
/* |
|
390 |
* Set the nparity property for RAID-Z vdevs. |
|
391 |
*/ |
|
392 |
nparity = -1ULL; |
|
393 |
if (ops == &vdev_raidz_ops) { |
|
394 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, |
|
395 |
&nparity) == 0) { |
|
396 |
/* |
|
397 |
* Currently, we can only support 2 parity devices. |
|
398 |
*/ |
|
399 |
if (nparity == 0 || nparity > 2) |
|
400 |
return (EINVAL); |
|
401 |
/* |
|
402 |
* Older versions can only support 1 parity device. |
|
403 |
*/ |
|
404 |
if (nparity == 2 && |
|
4577 | 405 |
spa_version(spa) < SPA_VERSION_RAID6) |
4527 | 406 |
return (ENOTSUP); |
407 |
} else { |
|
408 |
/* |
|
409 |
* We require the parity to be specified for SPAs that |
|
410 |
* support multiple parity levels. |
|
411 |
*/ |
|
4577 | 412 |
if (spa_version(spa) >= SPA_VERSION_RAID6) |
4527 | 413 |
return (EINVAL); |
414 |
/* |
|
415 |
* Otherwise, we default to 1 parity device for RAID-Z. |
|
416 |
*/ |
|
417 |
nparity = 1; |
|
418 |
} |
|
419 |
} else { |
|
420 |
nparity = 0; |
|
421 |
} |
|
422 |
ASSERT(nparity != -1ULL); |
|
423 |
||
789 | 424 |
vd = vdev_alloc_common(spa, id, guid, ops); |
425 |
||
4527 | 426 |
vd->vdev_islog = islog; |
427 |
vd->vdev_nparity = nparity; |
|
428 |
||
789 | 429 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) |
430 |
vd->vdev_path = spa_strdup(vd->vdev_path); |
|
431 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) |
|
432 |
vd->vdev_devid = spa_strdup(vd->vdev_devid); |
|
4451 | 433 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, |
434 |
&vd->vdev_physpath) == 0) |
|
435 |
vd->vdev_physpath = spa_strdup(vd->vdev_physpath); |
|
789 | 436 |
|
437 |
/* |
|
1171 | 438 |
* Set the whole_disk property. If it's not specified, leave the value |
439 |
* as -1. |
|
440 |
*/ |
|
441 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, |
|
442 |
&vd->vdev_wholedisk) != 0) |
|
443 |
vd->vdev_wholedisk = -1ULL; |
|
444 |
||
445 |
/* |
|
1544 | 446 |
* Look for the 'not present' flag. This will only be set if the device |
447 |
* was not present at the time of import. |
|
448 |
*/ |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
449 |
if (!spa->spa_import_faulted) |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
450 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
451 |
&vd->vdev_not_present); |
1544 | 452 |
|
453 |
/* |
|
1732 | 454 |
* Get the alignment requirement. |
455 |
*/ |
|
456 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); |
|
457 |
||
458 |
/* |
|
789 | 459 |
* If we're a top-level vdev, try to load the allocation parameters. |
460 |
*/ |
|
461 |
if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { |
|
462 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, |
|
463 |
&vd->vdev_ms_array); |
|
464 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, |
|
465 |
&vd->vdev_ms_shift); |
|
466 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, |
|
467 |
&vd->vdev_asize); |
|
468 |
} |
|
469 |
||
470 |
/* |
|
4451 | 471 |
* If we're a leaf vdev, try to load the DTL object and other state. |
789 | 472 |
*/ |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
473 |
if (vd->vdev_ops->vdev_op_leaf && |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
474 |
(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE)) { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
475 |
if (alloctype == VDEV_ALLOC_LOAD) { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
476 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
477 |
&vd->vdev_dtl.smo_object); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
478 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE, |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
479 |
&vd->vdev_unspare); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
480 |
} |
1732 | 481 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, |
482 |
&vd->vdev_offline); |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
483 |
|
4451 | 484 |
/* |
485 |
* When importing a pool, we want to ignore the persistent fault |
|
486 |
* state, as the diagnosis made on another system may not be |
|
487 |
* valid in the current context. |
|
488 |
*/ |
|
489 |
if (spa->spa_load_state == SPA_LOAD_OPEN) { |
|
490 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, |
|
491 |
&vd->vdev_faulted); |
|
492 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, |
|
493 |
&vd->vdev_degraded); |
|
494 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, |
|
495 |
&vd->vdev_removed); |
|
496 |
} |
|
789 | 497 |
} |
498 |
||
499 |
/* |
|
500 |
* Add ourselves to the parent's list of children. |
|
501 |
*/ |
|
502 |
vdev_add_child(parent, vd); |
|
503 |
||
2082 | 504 |
*vdp = vd; |
505 |
||
506 |
return (0); |
|
789 | 507 |
} |
508 |
||
509 |
void |
|
510 |
vdev_free(vdev_t *vd) |
|
511 |
{ |
|
512 |
int c; |
|
4451 | 513 |
spa_t *spa = vd->vdev_spa; |
789 | 514 |
|
515 |
/* |
|
516 |
* vdev_free() implies closing the vdev first. This is simpler than |
|
517 |
* trying to ensure complicated semantics for all callers. |
|
518 |
*/ |
|
519 |
vdev_close(vd); |
|
520 |
||
4451 | 521 |
|
1732 | 522 |
ASSERT(!list_link_active(&vd->vdev_dirty_node)); |
789 | 523 |
|
524 |
/* |
|
525 |
* Free all children. |
|
526 |
*/ |
|
527 |
for (c = 0; c < vd->vdev_children; c++) |
|
528 |
vdev_free(vd->vdev_child[c]); |
|
529 |
||
530 |
ASSERT(vd->vdev_child == NULL); |
|
531 |
ASSERT(vd->vdev_guid_sum == vd->vdev_guid); |
|
532 |
||
533 |
/* |
|
534 |
* Discard allocation state. |
|
535 |
*/ |
|
536 |
if (vd == vd->vdev_top) |
|
537 |
vdev_metaslab_fini(vd); |
|
538 |
||
539 |
ASSERT3U(vd->vdev_stat.vs_space, ==, 0); |
|
2082 | 540 |
ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0); |
789 | 541 |
ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); |
542 |
||
543 |
/* |
|
544 |
* Remove this vdev from its parent's child list. |
|
545 |
*/ |
|
546 |
vdev_remove_child(vd->vdev_parent, vd); |
|
547 |
||
548 |
ASSERT(vd->vdev_parent == NULL); |
|
549 |
||
4451 | 550 |
/* |
551 |
* Clean up vdev structure. |
|
552 |
*/ |
|
553 |
vdev_queue_fini(vd); |
|
554 |
vdev_cache_fini(vd); |
|
555 |
||
556 |
if (vd->vdev_path) |
|
557 |
spa_strfree(vd->vdev_path); |
|
558 |
if (vd->vdev_devid) |
|
559 |
spa_strfree(vd->vdev_devid); |
|
560 |
if (vd->vdev_physpath) |
|
561 |
spa_strfree(vd->vdev_physpath); |
|
562 |
||
563 |
if (vd->vdev_isspare) |
|
564 |
spa_spare_remove(vd); |
|
5450 | 565 |
if (vd->vdev_isl2cache) |
566 |
spa_l2cache_remove(vd); |
|
4451 | 567 |
|
568 |
txg_list_destroy(&vd->vdev_ms_list); |
|
569 |
txg_list_destroy(&vd->vdev_dtl_list); |
|
570 |
mutex_enter(&vd->vdev_dtl_lock); |
|
571 |
space_map_unload(&vd->vdev_dtl_map); |
|
572 |
space_map_destroy(&vd->vdev_dtl_map); |
|
573 |
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
574 |
space_map_destroy(&vd->vdev_dtl_scrub); |
|
575 |
mutex_exit(&vd->vdev_dtl_lock); |
|
576 |
mutex_destroy(&vd->vdev_dtl_lock); |
|
577 |
mutex_destroy(&vd->vdev_stat_lock); |
|
578 |
||
579 |
if (vd == spa->spa_root_vdev) |
|
580 |
spa->spa_root_vdev = NULL; |
|
581 |
||
582 |
kmem_free(vd, sizeof (vdev_t)); |
|
789 | 583 |
} |
584 |
||
585 |
/* |
|
586 |
* Transfer top-level vdev state from svd to tvd. |
|
587 |
*/ |
|
588 |
static void |
|
589 |
vdev_top_transfer(vdev_t *svd, vdev_t *tvd) |
|
590 |
{ |
|
591 |
spa_t *spa = svd->vdev_spa; |
|
592 |
metaslab_t *msp; |
|
593 |
vdev_t *vd; |
|
594 |
int t; |
|
595 |
||
596 |
ASSERT(tvd == tvd->vdev_top); |
|
597 |
||
598 |
tvd->vdev_ms_array = svd->vdev_ms_array; |
|
599 |
tvd->vdev_ms_shift = svd->vdev_ms_shift; |
|
600 |
tvd->vdev_ms_count = svd->vdev_ms_count; |
|
601 |
||
602 |
svd->vdev_ms_array = 0; |
|
603 |
svd->vdev_ms_shift = 0; |
|
604 |
svd->vdev_ms_count = 0; |
|
605 |
||
606 |
tvd->vdev_mg = svd->vdev_mg; |
|
607 |
tvd->vdev_ms = svd->vdev_ms; |
|
608 |
||
609 |
svd->vdev_mg = NULL; |
|
610 |
svd->vdev_ms = NULL; |
|
1732 | 611 |
|
612 |
if (tvd->vdev_mg != NULL) |
|
613 |
tvd->vdev_mg->mg_vd = tvd; |
|
789 | 614 |
|
615 |
tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; |
|
616 |
tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; |
|
2082 | 617 |
tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace; |
789 | 618 |
|
619 |
svd->vdev_stat.vs_alloc = 0; |
|
620 |
svd->vdev_stat.vs_space = 0; |
|
2082 | 621 |
svd->vdev_stat.vs_dspace = 0; |
789 | 622 |
|
623 |
for (t = 0; t < TXG_SIZE; t++) { |
|
624 |
while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) |
|
625 |
(void) txg_list_add(&tvd->vdev_ms_list, msp, t); |
|
626 |
while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL) |
|
627 |
(void) txg_list_add(&tvd->vdev_dtl_list, vd, t); |
|
628 |
if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t)) |
|
629 |
(void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t); |
|
630 |
} |
|
631 |
||
1732 | 632 |
if (list_link_active(&svd->vdev_dirty_node)) { |
789 | 633 |
vdev_config_clean(svd); |
634 |
vdev_config_dirty(tvd); |
|
635 |
} |
|
636 |
||
2082 | 637 |
tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio; |
638 |
svd->vdev_deflate_ratio = 0; |
|
4527 | 639 |
|
640 |
tvd->vdev_islog = svd->vdev_islog; |
|
641 |
svd->vdev_islog = 0; |
|
789 | 642 |
} |
643 |
||
644 |
static void |
|
645 |
vdev_top_update(vdev_t *tvd, vdev_t *vd) |
|
646 |
{ |
|
647 |
int c; |
|
648 |
||
649 |
if (vd == NULL) |
|
650 |
return; |
|
651 |
||
652 |
vd->vdev_top = tvd; |
|
653 |
||
654 |
for (c = 0; c < vd->vdev_children; c++) |
|
655 |
vdev_top_update(tvd, vd->vdev_child[c]); |
|
656 |
} |
|
657 |
||
658 |
/* |
|
659 |
* Add a mirror/replacing vdev above an existing vdev. |
|
660 |
*/ |
|
661 |
vdev_t * |
|
662 |
vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) |
|
663 |
{ |
|
664 |
spa_t *spa = cvd->vdev_spa; |
|
665 |
vdev_t *pvd = cvd->vdev_parent; |
|
666 |
vdev_t *mvd; |
|
667 |
||
668 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
|
669 |
||
670 |
mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); |
|
1732 | 671 |
|
672 |
mvd->vdev_asize = cvd->vdev_asize; |
|
673 |
mvd->vdev_ashift = cvd->vdev_ashift; |
|
674 |
mvd->vdev_state = cvd->vdev_state; |
|
675 |
||
789 | 676 |
vdev_remove_child(pvd, cvd); |
677 |
vdev_add_child(pvd, mvd); |
|
678 |
cvd->vdev_id = mvd->vdev_children; |
|
679 |
vdev_add_child(mvd, cvd); |
|
680 |
vdev_top_update(cvd->vdev_top, cvd->vdev_top); |
|
681 |
||
682 |
if (mvd == mvd->vdev_top) |
|
683 |
vdev_top_transfer(cvd, mvd); |
|
684 |
||
685 |
return (mvd); |
|
686 |
} |
|
687 |
||
688 |
/* |
|
689 |
* Remove a 1-way mirror/replacing vdev from the tree. |
|
690 |
*/ |
|
691 |
void |
|
692 |
vdev_remove_parent(vdev_t *cvd) |
|
693 |
{ |
|
694 |
vdev_t *mvd = cvd->vdev_parent; |
|
695 |
vdev_t *pvd = mvd->vdev_parent; |
|
696 |
||
697 |
ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); |
|
698 |
||
699 |
ASSERT(mvd->vdev_children == 1); |
|
700 |
ASSERT(mvd->vdev_ops == &vdev_mirror_ops || |
|
2082 | 701 |
mvd->vdev_ops == &vdev_replacing_ops || |
702 |
mvd->vdev_ops == &vdev_spare_ops); |
|
1732 | 703 |
cvd->vdev_ashift = mvd->vdev_ashift; |
789 | 704 |
|
705 |
vdev_remove_child(mvd, cvd); |
|
706 |
vdev_remove_child(pvd, mvd); |
|
707 |
cvd->vdev_id = mvd->vdev_id; |
|
708 |
vdev_add_child(pvd, cvd); |
|
2082 | 709 |
/* |
710 |
* If we created a new toplevel vdev, then we need to change the child's |
|
711 |
* vdev GUID to match the old toplevel vdev. Otherwise, we could have |
|
712 |
* detached an offline device, and when we go to import the pool we'll |
|
713 |
* think we have two toplevel vdevs, instead of a different version of |
|
714 |
* the same toplevel vdev. |
|
715 |
*/ |
|
716 |
if (cvd->vdev_top == cvd) { |
|
717 |
pvd->vdev_guid_sum -= cvd->vdev_guid; |
|
718 |
cvd->vdev_guid_sum -= cvd->vdev_guid; |
|
719 |
cvd->vdev_guid = mvd->vdev_guid; |
|
720 |
cvd->vdev_guid_sum += mvd->vdev_guid; |
|
721 |
pvd->vdev_guid_sum += cvd->vdev_guid; |
|
722 |
} |
|
789 | 723 |
vdev_top_update(cvd->vdev_top, cvd->vdev_top); |
724 |
||
725 |
if (cvd == cvd->vdev_top) |
|
726 |
vdev_top_transfer(mvd, cvd); |
|
727 |
||
728 |
ASSERT(mvd->vdev_children == 0); |
|
729 |
vdev_free(mvd); |
|
730 |
} |
|
731 |
||
1544 | 732 |
int |
789 | 733 |
vdev_metaslab_init(vdev_t *vd, uint64_t txg) |
734 |
{ |
|
735 |
spa_t *spa = vd->vdev_spa; |
|
1732 | 736 |
objset_t *mos = spa->spa_meta_objset; |
4527 | 737 |
metaslab_class_t *mc; |
1732 | 738 |
uint64_t m; |
789 | 739 |
uint64_t oldc = vd->vdev_ms_count; |
740 |
uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; |
|
1732 | 741 |
metaslab_t **mspp; |
742 |
int error; |
|
789 | 743 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
744 |
if (vd->vdev_ms_shift == 0) /* not being allocated from yet */ |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
745 |
return (0); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
746 |
|
789 | 747 |
dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc); |
748 |
||
749 |
ASSERT(oldc <= newc); |
|
750 |
||
4527 | 751 |
if (vd->vdev_islog) |
752 |
mc = spa->spa_log_class; |
|
753 |
else |
|
754 |
mc = spa->spa_normal_class; |
|
755 |
||
1732 | 756 |
if (vd->vdev_mg == NULL) |
757 |
vd->vdev_mg = metaslab_group_create(mc, vd); |
|
758 |
||
759 |
mspp = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); |
|
760 |
||
761 |
if (oldc != 0) { |
|
762 |
bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); |
|
763 |
kmem_free(vd->vdev_ms, oldc * sizeof (*mspp)); |
|
764 |
} |
|
765 |
||
766 |
vd->vdev_ms = mspp; |
|
789 | 767 |
vd->vdev_ms_count = newc; |
768 |
||
1732 | 769 |
for (m = oldc; m < newc; m++) { |
770 |
space_map_obj_t smo = { 0, 0, 0 }; |
|
789 | 771 |
if (txg == 0) { |
1732 | 772 |
uint64_t object = 0; |
773 |
error = dmu_read(mos, vd->vdev_ms_array, |
|
774 |
m * sizeof (uint64_t), sizeof (uint64_t), &object); |
|
775 |
if (error) |
|
776 |
return (error); |
|
777 |
if (object != 0) { |
|
778 |
dmu_buf_t *db; |
|
779 |
error = dmu_bonus_hold(mos, object, FTAG, &db); |
|
780 |
if (error) |
|
781 |
return (error); |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4808
diff
changeset
|
782 |
ASSERT3U(db->db_size, >=, sizeof (smo)); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4808
diff
changeset
|
783 |
bcopy(db->db_data, &smo, sizeof (smo)); |
1732 | 784 |
ASSERT3U(smo.smo_object, ==, object); |
1544 | 785 |
dmu_buf_rele(db, FTAG); |
789 | 786 |
} |
787 |
} |
|
1732 | 788 |
vd->vdev_ms[m] = metaslab_init(vd->vdev_mg, &smo, |
789 |
m << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg); |
|
789 | 790 |
} |
791 |
||
1544 | 792 |
return (0); |
789 | 793 |
} |
794 |
||
795 |
void |
|
796 |
vdev_metaslab_fini(vdev_t *vd) |
|
797 |
{ |
|
798 |
uint64_t m; |
|
799 |
uint64_t count = vd->vdev_ms_count; |
|
800 |
||
801 |
if (vd->vdev_ms != NULL) { |
|
802 |
for (m = 0; m < count; m++) |
|
1732 | 803 |
if (vd->vdev_ms[m] != NULL) |
804 |
metaslab_fini(vd->vdev_ms[m]); |
|
789 | 805 |
kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); |
806 |
vd->vdev_ms = NULL; |
|
807 |
} |
|
808 |
} |
|
809 |
||
5329 | 810 |
int |
811 |
vdev_probe(vdev_t *vd) |
|
812 |
{ |
|
813 |
if (vd == NULL) |
|
814 |
return (EINVAL); |
|
815 |
||
816 |
/* |
|
817 |
* Right now we only support status checks on the leaf vdevs. |
|
818 |
*/ |
|
819 |
if (vd->vdev_ops->vdev_op_leaf) |
|
820 |
return (vd->vdev_ops->vdev_op_probe(vd)); |
|
821 |
||
822 |
return (0); |
|
823 |
} |
|
824 |
||
789 | 825 |
/* |
826 |
* Prepare a virtual device for access. |
|
827 |
*/ |
|
828 |
int |
|
829 |
vdev_open(vdev_t *vd) |
|
830 |
{ |
|
831 |
int error; |
|
832 |
int c; |
|
833 |
uint64_t osize = 0; |
|
834 |
uint64_t asize, psize; |
|
1732 | 835 |
uint64_t ashift = 0; |
789 | 836 |
|
837 |
ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || |
|
838 |
vd->vdev_state == VDEV_STATE_CANT_OPEN || |
|
839 |
vd->vdev_state == VDEV_STATE_OFFLINE); |
|
840 |
||
841 |
if (vd->vdev_fault_mode == VDEV_FAULT_COUNT) |
|
842 |
vd->vdev_fault_arg >>= 1; |
|
843 |
else |
|
844 |
vd->vdev_fault_mode = VDEV_FAULT_NONE; |
|
845 |
||
846 |
vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
|
847 |
||
4451 | 848 |
if (!vd->vdev_removed && vd->vdev_faulted) { |
849 |
ASSERT(vd->vdev_children == 0); |
|
850 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, |
|
851 |
VDEV_AUX_ERR_EXCEEDED); |
|
852 |
return (ENXIO); |
|
853 |
} else if (vd->vdev_offline) { |
|
789 | 854 |
ASSERT(vd->vdev_children == 0); |
1544 | 855 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); |
789 | 856 |
return (ENXIO); |
857 |
} |
|
858 |
||
859 |
error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); |
|
860 |
||
1544 | 861 |
if (zio_injection_enabled && error == 0) |
862 |
error = zio_handle_device_injection(vd, ENXIO); |
|
863 |
||
4451 | 864 |
if (error) { |
865 |
if (vd->vdev_removed && |
|
866 |
vd->vdev_stat.vs_aux != VDEV_AUX_OPEN_FAILED) |
|
867 |
vd->vdev_removed = B_FALSE; |
|
789 | 868 |
|
1544 | 869 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
789 | 870 |
vd->vdev_stat.vs_aux); |
871 |
return (error); |
|
872 |
} |
|
873 |
||
4451 | 874 |
vd->vdev_removed = B_FALSE; |
875 |
||
876 |
if (vd->vdev_degraded) { |
|
877 |
ASSERT(vd->vdev_children == 0); |
|
878 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, |
|
879 |
VDEV_AUX_ERR_EXCEEDED); |
|
880 |
} else { |
|
881 |
vd->vdev_state = VDEV_STATE_HEALTHY; |
|
882 |
} |
|
789 | 883 |
|
884 |
for (c = 0; c < vd->vdev_children; c++) |
|
1544 | 885 |
if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) { |
886 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, |
|
887 |
VDEV_AUX_NONE); |
|
888 |
break; |
|
889 |
} |
|
789 | 890 |
|
891 |
osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); |
|
892 |
||
893 |
if (vd->vdev_children == 0) { |
|
894 |
if (osize < SPA_MINDEVSIZE) { |
|
1544 | 895 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
896 |
VDEV_AUX_TOO_SMALL); |
|
789 | 897 |
return (EOVERFLOW); |
898 |
} |
|
899 |
psize = osize; |
|
900 |
asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); |
|
901 |
} else { |
|
1732 | 902 |
if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE - |
789 | 903 |
(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { |
1544 | 904 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
905 |
VDEV_AUX_TOO_SMALL); |
|
789 | 906 |
return (EOVERFLOW); |
907 |
} |
|
908 |
psize = 0; |
|
909 |
asize = osize; |
|
910 |
} |
|
911 |
||
912 |
vd->vdev_psize = psize; |
|
913 |
||
914 |
if (vd->vdev_asize == 0) { |
|
915 |
/* |
|
916 |
* This is the first-ever open, so use the computed values. |
|
1732 | 917 |
* For testing purposes, a higher ashift can be requested. |
789 | 918 |
*/ |
919 |
vd->vdev_asize = asize; |
|
1732 | 920 |
vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); |
789 | 921 |
} else { |
922 |
/* |
|
923 |
* Make sure the alignment requirement hasn't increased. |
|
924 |
*/ |
|
1732 | 925 |
if (ashift > vd->vdev_top->vdev_ashift) { |
1544 | 926 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
927 |
VDEV_AUX_BAD_LABEL); |
|
789 | 928 |
return (EINVAL); |
929 |
} |
|
930 |
||
931 |
/* |
|
932 |
* Make sure the device hasn't shrunk. |
|
933 |
*/ |
|
934 |
if (asize < vd->vdev_asize) { |
|
1544 | 935 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
936 |
VDEV_AUX_BAD_LABEL); |
|
789 | 937 |
return (EINVAL); |
938 |
} |
|
939 |
||
940 |
/* |
|
941 |
* If all children are healthy and the asize has increased, |
|
942 |
* then we've experienced dynamic LUN growth. |
|
943 |
*/ |
|
944 |
if (vd->vdev_state == VDEV_STATE_HEALTHY && |
|
945 |
asize > vd->vdev_asize) { |
|
946 |
vd->vdev_asize = asize; |
|
947 |
} |
|
948 |
} |
|
949 |
||
1544 | 950 |
/* |
5329 | 951 |
* Ensure we can issue some IO before declaring the |
952 |
* vdev open for business. |
|
953 |
*/ |
|
954 |
error = vdev_probe(vd); |
|
955 |
if (error) { |
|
956 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
|
957 |
VDEV_AUX_OPEN_FAILED); |
|
958 |
return (error); |
|
959 |
} |
|
960 |
||
961 |
/* |
|
2082 | 962 |
* If this is a top-level vdev, compute the raidz-deflation |
963 |
* ratio. Note, we hard-code in 128k (1<<17) because it is the |
|
964 |
* current "typical" blocksize. Even if SPA_MAXBLOCKSIZE |
|
965 |
* changes, this algorithm must never change, or we will |
|
966 |
* inconsistently account for existing bp's. |
|
967 |
*/ |
|
968 |
if (vd->vdev_top == vd) { |
|
969 |
vd->vdev_deflate_ratio = (1<<17) / |
|
970 |
(vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT); |
|
971 |
} |
|
972 |
||
789 | 973 |
return (0); |
974 |
} |
|
975 |
||
976 |
/* |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
977 |
* Called once the vdevs are all opened, this routine validates the label |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
978 |
* contents. This needs to be done before vdev_load() so that we don't |
4451 | 979 |
* inadvertently do repair I/Os to the wrong device. |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
980 |
* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
981 |
* This function will only return failure if one of the vdevs indicates that it |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
982 |
* has since been destroyed or exported. This is only possible if |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
983 |
* /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
984 |
* will be updated but the function will return 0. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
985 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
986 |
int |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
987 |
vdev_validate(vdev_t *vd) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
988 |
{ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
989 |
spa_t *spa = vd->vdev_spa; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
990 |
int c; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
991 |
nvlist_t *label; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
992 |
uint64_t guid; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
993 |
uint64_t state; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
994 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
995 |
for (c = 0; c < vd->vdev_children; c++) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
996 |
if (vdev_validate(vd->vdev_child[c]) != 0) |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3697
diff
changeset
|
997 |
return (EBADF); |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
998 |
|
2174
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
999 |
/* |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1000 |
* If the device has already failed, or was marked offline, don't do |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1001 |
* any further validation. Otherwise, label I/O will fail and we will |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1002 |
* overwrite the previous state. |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1003 |
*/ |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
1004 |
if (vd->vdev_ops->vdev_op_leaf && !vdev_is_dead(vd)) { |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1005 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1006 |
if ((label = vdev_label_read_config(vd)) == NULL) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1007 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1008 |
VDEV_AUX_BAD_LABEL); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1009 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1010 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1011 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1012 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1013 |
&guid) != 0 || guid != spa_guid(spa)) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1014 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1015 |
VDEV_AUX_CORRUPT_DATA); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1016 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1017 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1018 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1019 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1020 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1021 |
&guid) != 0 || guid != vd->vdev_guid) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1022 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1023 |
VDEV_AUX_CORRUPT_DATA); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1024 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1025 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1026 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1027 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1028 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1029 |
&state) != 0) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1030 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1031 |
VDEV_AUX_CORRUPT_DATA); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1032 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1033 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1034 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1035 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1036 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1037 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1038 |
if (spa->spa_load_state == SPA_LOAD_OPEN && |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1039 |
state != POOL_STATE_ACTIVE) |
4070
4390ea390a1e
6386594 zdb message should be clearer when failing for lack of permissions
mc142369
parents:
3697
diff
changeset
|
1040 |
return (EBADF); |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1041 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1042 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1043 |
/* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1044 |
* If we were able to open and validate a vdev that was previously |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1045 |
* marked permanently unavailable, clear that state now. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1046 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1047 |
if (vd->vdev_not_present) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1048 |
vd->vdev_not_present = 0; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1049 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1050 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1051 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1052 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1053 |
/* |
789 | 1054 |
* Close a virtual device. |
1055 |
*/ |
|
1056 |
void |
|
1057 |
vdev_close(vdev_t *vd) |
|
1058 |
{ |
|
1059 |
vd->vdev_ops->vdev_op_close(vd); |
|
1060 |
||
4451 | 1061 |
vdev_cache_purge(vd); |
789 | 1062 |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1063 |
/* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1064 |
* We record the previous state before we close it, so that if we are |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1065 |
* doing a reopen(), we don't generate FMA ereports if we notice that |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1066 |
* it's still faulted. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1067 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1068 |
vd->vdev_prevstate = vd->vdev_state; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1069 |
|
789 | 1070 |
if (vd->vdev_offline) |
1071 |
vd->vdev_state = VDEV_STATE_OFFLINE; |
|
1072 |
else |
|
1073 |
vd->vdev_state = VDEV_STATE_CLOSED; |
|
1544 | 1074 |
vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
789 | 1075 |
} |
1076 |
||
1077 |
void |
|
1544 | 1078 |
vdev_reopen(vdev_t *vd) |
789 | 1079 |
{ |
1544 | 1080 |
spa_t *spa = vd->vdev_spa; |
789 | 1081 |
|
1544 | 1082 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
1083 |
||
789 | 1084 |
vdev_close(vd); |
1085 |
(void) vdev_open(vd); |
|
1086 |
||
1087 |
/* |
|
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1088 |
* Call vdev_validate() here to make sure we have the same device. |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1089 |
* Otherwise, a device with an invalid label could be successfully |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1090 |
* opened in response to vdev_reopen(). |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1091 |
*/ |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1092 |
if (vd->vdev_aux) { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1093 |
(void) vdev_validate_aux(vd); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1094 |
if (!vdev_is_dead(vd) && |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1095 |
!l2arc_vdev_present(vd)) { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1096 |
uint64_t size = vdev_get_rsize(vd); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1097 |
l2arc_add_vdev(spa, vd, |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1098 |
VDEV_LABEL_START_SIZE, |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1099 |
size - VDEV_LABEL_START_SIZE); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1100 |
} |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1101 |
} else { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1102 |
(void) vdev_validate(vd); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1103 |
} |
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1104 |
|
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1105 |
/* |
4451 | 1106 |
* Reassess parent vdev's health. |
789 | 1107 |
*/ |
4451 | 1108 |
vdev_propagate_state(vd); |
789 | 1109 |
} |
1110 |
||
1111 |
int |
|
2082 | 1112 |
vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing) |
789 | 1113 |
{ |
1114 |
int error; |
|
1115 |
||
1116 |
/* |
|
1117 |
* Normally, partial opens (e.g. of a mirror) are allowed. |
|
1118 |
* For a create, however, we want to fail the request if |
|
1119 |
* there are any components we can't open. |
|
1120 |
*/ |
|
1121 |
error = vdev_open(vd); |
|
1122 |
||
1123 |
if (error || vd->vdev_state != VDEV_STATE_HEALTHY) { |
|
1124 |
vdev_close(vd); |
|
1125 |
return (error ? error : ENXIO); |
|
1126 |
} |
|
1127 |
||
1128 |
/* |
|
1129 |
* Recursively initialize all labels. |
|
1130 |
*/ |
|
3377
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1131 |
if ((error = vdev_label_init(vd, txg, isreplacing ? |
a2fa338530c1
6393525 vdev_reopen() should verify that it's still the same device
eschrock
parents:
3059
diff
changeset
|
1132 |
VDEV_LABEL_REPLACE : VDEV_LABEL_CREATE)) != 0) { |
789 | 1133 |
vdev_close(vd); |
1134 |
return (error); |
|
1135 |
} |
|
1136 |
||
1137 |
return (0); |
|
1138 |
} |
|
1139 |
||
1140 |
/* |
|
1141 |
* The is the latter half of vdev_create(). It is distinct because it |
|
1142 |
* involves initiating transactions in order to do metaslab creation. |
|
1143 |
* For creation, we want to try to create all vdevs at once and then undo it |
|
1144 |
* if anything fails; this is much harder if we have pending transactions. |
|
1145 |
*/ |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1146 |
void |
789 | 1147 |
vdev_init(vdev_t *vd, uint64_t txg) |
1148 |
{ |
|
1149 |
/* |
|
1150 |
* Aim for roughly 200 metaslabs per vdev. |
|
1151 |
*/ |
|
1152 |
vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); |
|
1153 |
vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); |
|
1154 |
||
1155 |
/* |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1156 |
* Initialize the vdev's metaslabs. This can't fail because |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1157 |
* there's nothing to read when creating all new metaslabs. |
789 | 1158 |
*/ |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1159 |
VERIFY(vdev_metaslab_init(vd, txg) == 0); |
789 | 1160 |
} |
1161 |
||
1162 |
void |
|
1732 | 1163 |
vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) |
789 | 1164 |
{ |
1732 | 1165 |
ASSERT(vd == vd->vdev_top); |
1166 |
ASSERT(ISP2(flags)); |
|
789 | 1167 |
|
1732 | 1168 |
if (flags & VDD_METASLAB) |
1169 |
(void) txg_list_add(&vd->vdev_ms_list, arg, txg); |
|
1170 |
||
1171 |
if (flags & VDD_DTL) |
|
1172 |
(void) txg_list_add(&vd->vdev_dtl_list, arg, txg); |
|
1173 |
||
1174 |
(void) txg_list_add(&vd->vdev_spa->spa_vdev_txg_list, vd, txg); |
|
789 | 1175 |
} |
1176 |
||
1177 |
void |
|
1178 |
vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size) |
|
1179 |
{ |
|
1180 |
mutex_enter(sm->sm_lock); |
|
1181 |
if (!space_map_contains(sm, txg, size)) |
|
1182 |
space_map_add(sm, txg, size); |
|
1183 |
mutex_exit(sm->sm_lock); |
|
1184 |
} |
|
1185 |
||
1186 |
int |
|
1187 |
vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size) |
|
1188 |
{ |
|
1189 |
int dirty; |
|
1190 |
||
1191 |
/* |
|
1192 |
* Quick test without the lock -- covers the common case that |
|
1193 |
* there are no dirty time segments. |
|
1194 |
*/ |
|
1195 |
if (sm->sm_space == 0) |
|
1196 |
return (0); |
|
1197 |
||
1198 |
mutex_enter(sm->sm_lock); |
|
1199 |
dirty = space_map_contains(sm, txg, size); |
|
1200 |
mutex_exit(sm->sm_lock); |
|
1201 |
||
1202 |
return (dirty); |
|
1203 |
} |
|
1204 |
||
1205 |
/* |
|
1206 |
* Reassess DTLs after a config change or scrub completion. |
|
1207 |
*/ |
|
1208 |
void |
|
1209 |
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) |
|
1210 |
{ |
|
1544 | 1211 |
spa_t *spa = vd->vdev_spa; |
789 | 1212 |
int c; |
1213 |
||
1544 | 1214 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
789 | 1215 |
|
1216 |
if (vd->vdev_children == 0) { |
|
1217 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1218 |
/* |
|
1219 |
* We're successfully scrubbed everything up to scrub_txg. |
|
1220 |
* Therefore, excise all old DTLs up to that point, then |
|
1221 |
* fold in the DTLs for everything we couldn't scrub. |
|
1222 |
*/ |
|
1223 |
if (scrub_txg != 0) { |
|
1224 |
space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg); |
|
1225 |
space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub); |
|
1226 |
} |
|
1227 |
if (scrub_done) |
|
1228 |
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
1229 |
mutex_exit(&vd->vdev_dtl_lock); |
|
1732 | 1230 |
if (txg != 0) |
1231 |
vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg); |
|
789 | 1232 |
return; |
1233 |
} |
|
1234 |
||
1544 | 1235 |
/* |
1236 |
* Make sure the DTLs are always correct under the scrub lock. |
|
1237 |
*/ |
|
1238 |
if (vd == spa->spa_root_vdev) |
|
1239 |
mutex_enter(&spa->spa_scrub_lock); |
|
1240 |
||
789 | 1241 |
mutex_enter(&vd->vdev_dtl_lock); |
1242 |
space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); |
|
1243 |
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
1244 |
mutex_exit(&vd->vdev_dtl_lock); |
|
1245 |
||
1246 |
for (c = 0; c < vd->vdev_children; c++) { |
|
1247 |
vdev_t *cvd = vd->vdev_child[c]; |
|
1248 |
vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done); |
|
1249 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1250 |
space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map); |
|
1251 |
space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub); |
|
1252 |
mutex_exit(&vd->vdev_dtl_lock); |
|
1253 |
} |
|
1544 | 1254 |
|
1255 |
if (vd == spa->spa_root_vdev) |
|
1256 |
mutex_exit(&spa->spa_scrub_lock); |
|
789 | 1257 |
} |
1258 |
||
1259 |
static int |
|
1260 |
vdev_dtl_load(vdev_t *vd) |
|
1261 |
{ |
|
1262 |
spa_t *spa = vd->vdev_spa; |
|
1263 |
space_map_obj_t *smo = &vd->vdev_dtl; |
|
1732 | 1264 |
objset_t *mos = spa->spa_meta_objset; |
789 | 1265 |
dmu_buf_t *db; |
1266 |
int error; |
|
1267 |
||
1268 |
ASSERT(vd->vdev_children == 0); |
|
1269 |
||
1270 |
if (smo->smo_object == 0) |
|
1271 |
return (0); |
|
1272 |
||
1732 | 1273 |
if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0) |
1544 | 1274 |
return (error); |
1732 | 1275 |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4808
diff
changeset
|
1276 |
ASSERT3U(db->db_size, >=, sizeof (*smo)); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4808
diff
changeset
|
1277 |
bcopy(db->db_data, smo, sizeof (*smo)); |
1544 | 1278 |
dmu_buf_rele(db, FTAG); |
789 | 1279 |
|
1280 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1732 | 1281 |
error = space_map_load(&vd->vdev_dtl_map, NULL, SM_ALLOC, smo, mos); |
789 | 1282 |
mutex_exit(&vd->vdev_dtl_lock); |
1283 |
||
1284 |
return (error); |
|
1285 |
} |
|
1286 |
||
1287 |
void |
|
1288 |
vdev_dtl_sync(vdev_t *vd, uint64_t txg) |
|
1289 |
{ |
|
1290 |
spa_t *spa = vd->vdev_spa; |
|
1291 |
space_map_obj_t *smo = &vd->vdev_dtl; |
|
1292 |
space_map_t *sm = &vd->vdev_dtl_map; |
|
1732 | 1293 |
objset_t *mos = spa->spa_meta_objset; |
789 | 1294 |
space_map_t smsync; |
1295 |
kmutex_t smlock; |
|
1296 |
dmu_buf_t *db; |
|
1297 |
dmu_tx_t *tx; |
|
1298 |
||
1299 |
dprintf("%s in txg %llu pass %d\n", |
|
1300 |
vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); |
|
1301 |
||
1302 |
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); |
|
1303 |
||
1304 |
if (vd->vdev_detached) { |
|
1305 |
if (smo->smo_object != 0) { |
|
1732 | 1306 |
int err = dmu_object_free(mos, smo->smo_object, tx); |
789 | 1307 |
ASSERT3U(err, ==, 0); |
1308 |
smo->smo_object = 0; |
|
1309 |
} |
|
1310 |
dmu_tx_commit(tx); |
|
1732 | 1311 |
dprintf("detach %s committed in txg %llu\n", |
1312 |
vdev_description(vd), txg); |
|
789 | 1313 |
return; |
1314 |
} |
|
1315 |
||
1316 |
if (smo->smo_object == 0) { |
|
1317 |
ASSERT(smo->smo_objsize == 0); |
|
1318 |
ASSERT(smo->smo_alloc == 0); |
|
1732 | 1319 |
smo->smo_object = dmu_object_alloc(mos, |
789 | 1320 |
DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, |
1321 |
DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); |
|
1322 |
ASSERT(smo->smo_object != 0); |
|
1323 |
vdev_config_dirty(vd->vdev_top); |
|
1324 |
} |
|
1325 |
||
1326 |
mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL); |
|
1327 |
||
1328 |
space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift, |
|
1329 |
&smlock); |
|
1330 |
||
1331 |
mutex_enter(&smlock); |
|
1332 |
||
1333 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1732 | 1334 |
space_map_walk(sm, space_map_add, &smsync); |
789 | 1335 |
mutex_exit(&vd->vdev_dtl_lock); |
1336 |
||
1732 | 1337 |
space_map_truncate(smo, mos, tx); |
1338 |
space_map_sync(&smsync, SM_ALLOC, smo, mos, tx); |
|
789 | 1339 |
|
1340 |
space_map_destroy(&smsync); |
|
1341 |
||
1342 |
mutex_exit(&smlock); |
|
1343 |
mutex_destroy(&smlock); |
|
1344 |
||
1732 | 1345 |
VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); |
789 | 1346 |
dmu_buf_will_dirty(db, tx); |
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4808
diff
changeset
|
1347 |
ASSERT3U(db->db_size, >=, sizeof (*smo)); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4808
diff
changeset
|
1348 |
bcopy(smo, db->db_data, sizeof (*smo)); |
1544 | 1349 |
dmu_buf_rele(db, FTAG); |
789 | 1350 |
|
1351 |
dmu_tx_commit(tx); |
|
1352 |
} |
|
1353 |
||
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1354 |
void |
1544 | 1355 |
vdev_load(vdev_t *vd) |
789 | 1356 |
{ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1357 |
int c; |
789 | 1358 |
|
1359 |
/* |
|
1360 |
* Recursively load all children. |
|
1361 |
*/ |
|
1362 |
for (c = 0; c < vd->vdev_children; c++) |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1363 |
vdev_load(vd->vdev_child[c]); |
789 | 1364 |
|
1365 |
/* |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1366 |
* If this is a top-level vdev, initialize its metaslabs. |
789 | 1367 |
*/ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1368 |
if (vd == vd->vdev_top && |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1369 |
(vd->vdev_ashift == 0 || vd->vdev_asize == 0 || |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1370 |
vdev_metaslab_init(vd, 0) != 0)) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1371 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1372 |
VDEV_AUX_CORRUPT_DATA); |
789 | 1373 |
|
1374 |
/* |
|
1375 |
* If this is a leaf vdev, load its DTL. |
|
1376 |
*/ |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1377 |
if (vd->vdev_ops->vdev_op_leaf && vdev_dtl_load(vd) != 0) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1378 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1379 |
VDEV_AUX_CORRUPT_DATA); |
789 | 1380 |
} |
1381 |
||
2082 | 1382 |
/* |
5450 | 1383 |
* The special vdev case is used for hot spares and l2cache devices. Its |
1384 |
* sole purpose it to set the vdev state for the associated vdev. To do this, |
|
1385 |
* we make sure that we can open the underlying device, then try to read the |
|
1386 |
* label, and make sure that the label is sane and that it hasn't been |
|
1387 |
* repurposed to another pool. |
|
2082 | 1388 |
*/ |
1389 |
int |
|
5450 | 1390 |
vdev_validate_aux(vdev_t *vd) |
2082 | 1391 |
{ |
1392 |
nvlist_t *label; |
|
1393 |
uint64_t guid, version; |
|
1394 |
uint64_t state; |
|
1395 |
||
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1396 |
if (vdev_is_dead(vd)) |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1397 |
return (0); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1398 |
|
2082 | 1399 |
if ((label = vdev_label_read_config(vd)) == NULL) { |
1400 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
|
1401 |
VDEV_AUX_CORRUPT_DATA); |
|
1402 |
return (-1); |
|
1403 |
} |
|
1404 |
||
1405 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 || |
|
4577 | 1406 |
version > SPA_VERSION || |
2082 | 1407 |
nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 || |
1408 |
guid != vd->vdev_guid || |
|
1409 |
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) { |
|
1410 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
|
1411 |
VDEV_AUX_CORRUPT_DATA); |
|
1412 |
nvlist_free(label); |
|
1413 |
return (-1); |
|
1414 |
} |
|
1415 |
||
1416 |
/* |
|
1417 |
* We don't actually check the pool state here. If it's in fact in |
|
1418 |
* use by another pool, we update this fact on the fly when requested. |
|
1419 |
*/ |
|
1420 |
nvlist_free(label); |
|
1421 |
return (0); |
|
1422 |
} |
|
1423 |
||
789 | 1424 |
void |
1425 |
vdev_sync_done(vdev_t *vd, uint64_t txg) |
|
1426 |
{ |
|
1427 |
metaslab_t *msp; |
|
1428 |
||
1429 |
dprintf("%s txg %llu\n", vdev_description(vd), txg); |
|
1430 |
||
1431 |
while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) |
|
1432 |
metaslab_sync_done(msp, txg); |
|
1433 |
} |
|
1434 |
||
1435 |
void |
|
1436 |
vdev_sync(vdev_t *vd, uint64_t txg) |
|
1437 |
{ |
|
1438 |
spa_t *spa = vd->vdev_spa; |
|
1439 |
vdev_t *lvd; |
|
1440 |
metaslab_t *msp; |
|
1732 | 1441 |
dmu_tx_t *tx; |
789 | 1442 |
|
1443 |
dprintf("%s txg %llu pass %d\n", |
|
1444 |
vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); |
|
1445 |
||
1732 | 1446 |
if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) { |
1447 |
ASSERT(vd == vd->vdev_top); |
|
1448 |
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); |
|
1449 |
vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset, |
|
1450 |
DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx); |
|
1451 |
ASSERT(vd->vdev_ms_array != 0); |
|
1452 |
vdev_config_dirty(vd); |
|
1453 |
dmu_tx_commit(tx); |
|
1454 |
} |
|
789 | 1455 |
|
1732 | 1456 |
while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) { |
789 | 1457 |
metaslab_sync(msp, txg); |
1732 | 1458 |
(void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg)); |
1459 |
} |
|
789 | 1460 |
|
1461 |
while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL) |
|
1462 |
vdev_dtl_sync(lvd, txg); |
|
1463 |
||
1464 |
(void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)); |
|
1465 |
} |
|
1466 |
||
1467 |
uint64_t |
|
1468 |
vdev_psize_to_asize(vdev_t *vd, uint64_t psize) |
|
1469 |
{ |
|
1470 |
return (vd->vdev_ops->vdev_op_asize(vd, psize)); |
|
1471 |
} |
|
1472 |
||
1473 |
const char * |
|
1474 |
vdev_description(vdev_t *vd) |
|
1475 |
{ |
|
1476 |
if (vd == NULL || vd->vdev_ops == NULL) |
|
1477 |
return ("<unknown>"); |
|
1478 |
||
1479 |
if (vd->vdev_path != NULL) |
|
1480 |
return (vd->vdev_path); |
|
1481 |
||
1482 |
if (vd->vdev_parent == NULL) |
|
1483 |
return (spa_name(vd->vdev_spa)); |
|
1484 |
||
1485 |
return (vd->vdev_ops->vdev_op_type); |
|
1486 |
} |
|
1487 |
||
4451 | 1488 |
/* |
1489 |
* Mark the given vdev faulted. A faulted vdev behaves as if the device could |
|
1490 |
* not be opened, and no I/O is attempted. |
|
1491 |
*/ |
|
789 | 1492 |
int |
4451 | 1493 |
vdev_fault(spa_t *spa, uint64_t guid) |
1494 |
{ |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1495 |
vdev_t *vd; |
4451 | 1496 |
uint64_t txg; |
1497 |
||
5329 | 1498 |
/* |
1499 |
* Disregard a vdev fault request if the pool has |
|
1500 |
* experienced a complete failure. |
|
1501 |
* |
|
1502 |
* XXX - We do this here so that we don't hold the |
|
1503 |
* spa_namespace_lock in the event that we can't get |
|
1504 |
* the RW_WRITER spa_config_lock. |
|
1505 |
*/ |
|
1506 |
if (spa_state(spa) == POOL_STATE_IO_FAILURE) |
|
1507 |
return (EIO); |
|
1508 |
||
4451 | 1509 |
txg = spa_vdev_enter(spa); |
1510 |
||
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1511 |
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) |
4451 | 1512 |
return (spa_vdev_exit(spa, NULL, txg, ENODEV)); |
1513 |
if (!vd->vdev_ops->vdev_op_leaf) |
|
1514 |
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
|
1515 |
||
1516 |
/* |
|
1517 |
* Faulted state takes precedence over degraded. |
|
1518 |
*/ |
|
1519 |
vd->vdev_faulted = 1ULL; |
|
1520 |
vd->vdev_degraded = 0ULL; |
|
1521 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_FAULTED, |
|
1522 |
VDEV_AUX_ERR_EXCEEDED); |
|
1523 |
||
1524 |
/* |
|
1525 |
* If marking the vdev as faulted cause the toplevel vdev to become |
|
1526 |
* unavailable, then back off and simply mark the vdev as degraded |
|
1527 |
* instead. |
|
1528 |
*/ |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1529 |
if (vdev_is_dead(vd->vdev_top) && vd->vdev_aux == NULL) { |
4451 | 1530 |
vd->vdev_degraded = 1ULL; |
1531 |
vd->vdev_faulted = 0ULL; |
|
1532 |
||
1533 |
/* |
|
1534 |
* If we reopen the device and it's not dead, only then do we |
|
1535 |
* mark it degraded. |
|
1536 |
*/ |
|
1537 |
vdev_reopen(vd); |
|
1538 |
||
5329 | 1539 |
if (vdev_readable(vd)) { |
4451 | 1540 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, |
1541 |
VDEV_AUX_ERR_EXCEEDED); |
|
1542 |
} |
|
1543 |
} |
|
1544 |
||
1545 |
vdev_config_dirty(vd->vdev_top); |
|
1546 |
||
1547 |
(void) spa_vdev_exit(spa, NULL, txg, 0); |
|
1548 |
||
1549 |
return (0); |
|
1550 |
} |
|
1551 |
||
1552 |
/* |
|
1553 |
* Mark the given vdev degraded. A degraded vdev is purely an indication to the |
|
1554 |
* user that something is wrong. The vdev continues to operate as normal as far |
|
1555 |
* as I/O is concerned. |
|
1556 |
*/ |
|
1557 |
int |
|
1558 |
vdev_degrade(spa_t *spa, uint64_t guid) |
|
1559 |
{ |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1560 |
vdev_t *vd; |
4451 | 1561 |
uint64_t txg; |
1562 |
||
5329 | 1563 |
/* |
1564 |
* Disregard a vdev fault request if the pool has |
|
1565 |
* experienced a complete failure. |
|
1566 |
* |
|
1567 |
* XXX - We do this here so that we don't hold the |
|
1568 |
* spa_namespace_lock in the event that we can't get |
|
1569 |
* the RW_WRITER spa_config_lock. |
|
1570 |
*/ |
|
1571 |
if (spa_state(spa) == POOL_STATE_IO_FAILURE) |
|
1572 |
return (EIO); |
|
1573 |
||
4451 | 1574 |
txg = spa_vdev_enter(spa); |
1575 |
||
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1576 |
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) |
4451 | 1577 |
return (spa_vdev_exit(spa, NULL, txg, ENODEV)); |
1578 |
if (!vd->vdev_ops->vdev_op_leaf) |
|
1579 |
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
|
1580 |
||
1581 |
/* |
|
1582 |
* If the vdev is already faulted, then don't do anything. |
|
1583 |
*/ |
|
1584 |
if (vd->vdev_faulted || vd->vdev_degraded) { |
|
1585 |
(void) spa_vdev_exit(spa, NULL, txg, 0); |
|
1586 |
return (0); |
|
1587 |
} |
|
1588 |
||
1589 |
vd->vdev_degraded = 1ULL; |
|
1590 |
if (!vdev_is_dead(vd)) |
|
1591 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, |
|
1592 |
VDEV_AUX_ERR_EXCEEDED); |
|
1593 |
vdev_config_dirty(vd->vdev_top); |
|
1594 |
||
1595 |
(void) spa_vdev_exit(spa, NULL, txg, 0); |
|
1596 |
||
1597 |
return (0); |
|
1598 |
} |
|
1599 |
||
1600 |
/* |
|
1601 |
* Online the given vdev. If 'unspare' is set, it implies two things. First, |
|
1602 |
* any attached spare device should be detached when the device finishes |
|
1603 |
* resilvering. Second, the online should be treated like a 'test' online case, |
|
1604 |
* so no FMA events are generated if the device fails to open. |
|
1605 |
*/ |
|
1606 |
int |
|
1607 |
vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, |
|
1608 |
vdev_state_t *newstate) |
|
789 | 1609 |
{ |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1610 |
vdev_t *vd; |
1485 | 1611 |
uint64_t txg; |
789 | 1612 |
|
5329 | 1613 |
/* |
1614 |
* Disregard a vdev fault request if the pool has |
|
1615 |
* experienced a complete failure. |
|
1616 |
* |
|
1617 |
* XXX - We do this here so that we don't hold the |
|
1618 |
* spa_namespace_lock in the event that we can't get |
|
1619 |
* the RW_WRITER spa_config_lock. |
|
1620 |
*/ |
|
1621 |
if (spa_state(spa) == POOL_STATE_IO_FAILURE) |
|
1622 |
return (EIO); |
|
1623 |
||
1485 | 1624 |
txg = spa_vdev_enter(spa); |
1625 |
||
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1626 |
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) |
1485 | 1627 |
return (spa_vdev_exit(spa, NULL, txg, ENODEV)); |
789 | 1628 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1629 |
if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1630 |
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1631 |
|
789 | 1632 |
vd->vdev_offline = B_FALSE; |
1485 | 1633 |
vd->vdev_tmpoffline = B_FALSE; |
4451 | 1634 |
vd->vdev_checkremove = (flags & ZFS_ONLINE_CHECKREMOVE) ? |
1635 |
B_TRUE : B_FALSE; |
|
1636 |
vd->vdev_forcefault = (flags & ZFS_ONLINE_FORCEFAULT) ? |
|
1637 |
B_TRUE : B_FALSE; |
|
1544 | 1638 |
vdev_reopen(vd->vdev_top); |
4451 | 1639 |
vd->vdev_checkremove = vd->vdev_forcefault = B_FALSE; |
1640 |
||
1641 |
if (newstate) |
|
1642 |
*newstate = vd->vdev_state; |
|
1643 |
if ((flags & ZFS_ONLINE_UNSPARE) && |
|
1644 |
!vdev_is_dead(vd) && vd->vdev_parent && |
|
1645 |
vd->vdev_parent->vdev_ops == &vdev_spare_ops && |
|
1646 |
vd->vdev_parent->vdev_child[0] == vd) |
|
1647 |
vd->vdev_unspare = B_TRUE; |
|
789 | 1648 |
|
1485 | 1649 |
vdev_config_dirty(vd->vdev_top); |
1650 |
||
1651 |
(void) spa_vdev_exit(spa, NULL, txg, 0); |
|
789 | 1652 |
|
4451 | 1653 |
/* |
1654 |
* Must hold spa_namespace_lock in order to post resilver sysevent |
|
1655 |
* w/pool name. |
|
1656 |
*/ |
|
1657 |
mutex_enter(&spa_namespace_lock); |
|
789 | 1658 |
VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
4451 | 1659 |
mutex_exit(&spa_namespace_lock); |
789 | 1660 |
|
1661 |
return (0); |
|
1662 |
} |
|
1663 |
||
1664 |
int |
|
4451 | 1665 |
vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags) |
789 | 1666 |
{ |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1667 |
vdev_t *vd; |
1485 | 1668 |
uint64_t txg; |
789 | 1669 |
|
5329 | 1670 |
/* |
1671 |
* Disregard a vdev fault request if the pool has |
|
1672 |
* experienced a complete failure. |
|
1673 |
* |
|
1674 |
* XXX - We do this here so that we don't hold the |
|
1675 |
* spa_namespace_lock in the event that we can't get |
|
1676 |
* the RW_WRITER spa_config_lock. |
|
1677 |
*/ |
|
1678 |
if (spa_state(spa) == POOL_STATE_IO_FAILURE) |
|
1679 |
return (EIO); |
|
1680 |
||
1485 | 1681 |
txg = spa_vdev_enter(spa); |
789 | 1682 |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1683 |
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) |
1485 | 1684 |
return (spa_vdev_exit(spa, NULL, txg, ENODEV)); |
789 | 1685 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1686 |
if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1687 |
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1688 |
|
789 | 1689 |
/* |
1732 | 1690 |
* If the device isn't already offline, try to offline it. |
789 | 1691 |
*/ |
1732 | 1692 |
if (!vd->vdev_offline) { |
1693 |
/* |
|
1694 |
* If this device's top-level vdev has a non-empty DTL, |
|
1695 |
* don't allow the device to be offlined. |
|
1696 |
* |
|
1697 |
* XXX -- make this more precise by allowing the offline |
|
1698 |
* as long as the remaining devices don't have any DTL holes. |
|
1699 |
*/ |
|
1700 |
if (vd->vdev_top->vdev_dtl_map.sm_space != 0) |
|
1701 |
return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
|
789 | 1702 |
|
1732 | 1703 |
/* |
1704 |
* Offline this device and reopen its top-level vdev. |
|
1705 |
* If this action results in the top-level vdev becoming |
|
1706 |
* unusable, undo it and fail the request. |
|
1707 |
*/ |
|
1708 |
vd->vdev_offline = B_TRUE; |
|
1544 | 1709 |
vdev_reopen(vd->vdev_top); |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
1710 |
if (vdev_is_dead(vd->vdev_top) && vd->vdev_aux == NULL) { |
1732 | 1711 |
vd->vdev_offline = B_FALSE; |
1712 |
vdev_reopen(vd->vdev_top); |
|
1713 |
return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
|
1714 |
} |
|
789 | 1715 |
} |
1716 |
||
4451 | 1717 |
vd->vdev_tmpoffline = (flags & ZFS_OFFLINE_TEMPORARY) ? |
1718 |
B_TRUE : B_FALSE; |
|
1732 | 1719 |
|
1720 |
vdev_config_dirty(vd->vdev_top); |
|
1485 | 1721 |
|
1722 |
return (spa_vdev_exit(spa, NULL, txg, 0)); |
|
789 | 1723 |
} |
1724 |
||
1544 | 1725 |
/* |
1726 |
* Clear the error counts associated with this vdev. Unlike vdev_online() and |
|
1727 |
* vdev_offline(), we assume the spa config is locked. We also clear all |
|
1728 |
* children. If 'vd' is NULL, then the user wants to clear all vdevs. |
|
5329 | 1729 |
* If reopen is specified then attempt to reopen the vdev if the vdev is |
1730 |
* faulted or degraded. |
|
1544 | 1731 |
*/ |
1732 |
void |
|
5329 | 1733 |
vdev_clear(spa_t *spa, vdev_t *vd, boolean_t reopen_wanted) |
789 | 1734 |
{ |
1544 | 1735 |
int c; |
789 | 1736 |
|
1544 | 1737 |
if (vd == NULL) |
1738 |
vd = spa->spa_root_vdev; |
|
789 | 1739 |
|
1544 | 1740 |
vd->vdev_stat.vs_read_errors = 0; |
1741 |
vd->vdev_stat.vs_write_errors = 0; |
|
1742 |
vd->vdev_stat.vs_checksum_errors = 0; |
|
5329 | 1743 |
vd->vdev_is_failing = B_FALSE; |
789 | 1744 |
|
1544 | 1745 |
for (c = 0; c < vd->vdev_children; c++) |
5329 | 1746 |
vdev_clear(spa, vd->vdev_child[c], reopen_wanted); |
4451 | 1747 |
|
1748 |
/* |
|
6959 | 1749 |
* If we're in the FAULTED state or have experienced failed I/O, then |
1750 |
* clear the persistent state and attempt to reopen the device. We |
|
1751 |
* also mark the vdev config dirty, so that the new faulted state is |
|
1752 |
* written out to disk. |
|
4451 | 1753 |
*/ |
6959 | 1754 |
if (reopen_wanted && (vd->vdev_faulted || vd->vdev_degraded || |
1755 |
vd->vdev_stat.vs_aux == VDEV_AUX_IO_FAILURE)) { |
|
1756 |
boolean_t resilver = (vd->vdev_faulted || vd->vdev_degraded); |
|
1757 |
||
4451 | 1758 |
vd->vdev_faulted = vd->vdev_degraded = 0; |
1759 |
vdev_reopen(vd); |
|
1760 |
vdev_config_dirty(vd->vdev_top); |
|
1761 |
||
6959 | 1762 |
if (resilver && vd->vdev_aux == NULL && !vdev_is_dead(vd)) |
4808 | 1763 |
spa_async_request(spa, SPA_ASYNC_RESILVER); |
4451 | 1764 |
|
1765 |
spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR); |
|
1766 |
} |
|
789 | 1767 |
} |
1768 |
||
1769 |
int |
|
5329 | 1770 |
vdev_readable(vdev_t *vd) |
1771 |
{ |
|
1772 |
/* XXPOLICY */ |
|
1773 |
return (!vdev_is_dead(vd)); |
|
1774 |
} |
|
1775 |
||
1776 |
int |
|
1777 |
vdev_writeable(vdev_t *vd) |
|
1778 |
{ |
|
5369
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
1779 |
return (!vdev_is_dead(vd) && !vd->vdev_is_failing); |
5329 | 1780 |
} |
1781 |
||
1782 |
int |
|
789 | 1783 |
vdev_is_dead(vdev_t *vd) |
1784 |
{ |
|
6523
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1785 |
/* |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1786 |
* If the vdev experienced I/O failures, then the vdev is marked |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1787 |
* as faulted (VDEV_STATE_FAULTED) for status output and FMA; however, |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1788 |
* we need to allow access to the vdev for resumed I/Os (see |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1789 |
* zio_vdev_resume_io() ). |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1790 |
*/ |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1791 |
return (vd->vdev_state < VDEV_STATE_DEGRADED && |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
1792 |
vd->vdev_stat.vs_aux != VDEV_AUX_IO_FAILURE); |
789 | 1793 |
} |
1794 |
||
1795 |
int |
|
1796 |
vdev_error_inject(vdev_t *vd, zio_t *zio) |
|
1797 |
{ |
|
1798 |
int error = 0; |
|
1799 |
||
1800 |
if (vd->vdev_fault_mode == VDEV_FAULT_NONE) |
|
1801 |
return (0); |
|
1802 |
||
1803 |
if (((1ULL << zio->io_type) & vd->vdev_fault_mask) == 0) |
|
1804 |
return (0); |
|
1805 |
||
1806 |
switch (vd->vdev_fault_mode) { |
|
1807 |
case VDEV_FAULT_RANDOM: |
|
1808 |
if (spa_get_random(vd->vdev_fault_arg) == 0) |
|
1809 |
error = EIO; |
|
1810 |
break; |
|
1811 |
||
1812 |
case VDEV_FAULT_COUNT: |
|
1813 |
if ((int64_t)--vd->vdev_fault_arg <= 0) |
|
1814 |
vd->vdev_fault_mode = VDEV_FAULT_NONE; |
|
1815 |
error = EIO; |
|
1816 |
break; |
|
1817 |
} |
|
1818 |
||
1819 |
return (error); |
|
1820 |
} |
|
1821 |
||
1822 |
/* |
|
1823 |
* Get statistics for the given vdev. |
|
1824 |
*/ |
|
1825 |
void |
|
1826 |
vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) |
|
1827 |
{ |
|
1828 |
vdev_t *rvd = vd->vdev_spa->spa_root_vdev; |
|
1829 |
int c, t; |
|
1830 |
||
1831 |
mutex_enter(&vd->vdev_stat_lock); |
|
1832 |
bcopy(&vd->vdev_stat, vs, sizeof (*vs)); |
|
1833 |
vs->vs_timestamp = gethrtime() - vs->vs_timestamp; |
|
1834 |
vs->vs_state = vd->vdev_state; |
|
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
1835 |
vs->vs_rsize = vdev_get_rsize(vd); |
789 | 1836 |
mutex_exit(&vd->vdev_stat_lock); |
1837 |
||
1838 |
/* |
|
1839 |
* If we're getting stats on the root vdev, aggregate the I/O counts |
|
1840 |
* over all top-level vdevs (i.e. the direct children of the root). |
|
1841 |
*/ |
|
1842 |
if (vd == rvd) { |
|
1843 |
for (c = 0; c < rvd->vdev_children; c++) { |
|
1844 |
vdev_t *cvd = rvd->vdev_child[c]; |
|
1845 |
vdev_stat_t *cvs = &cvd->vdev_stat; |
|
1846 |
||
1847 |
mutex_enter(&vd->vdev_stat_lock); |
|
1848 |
for (t = 0; t < ZIO_TYPES; t++) { |
|
1849 |
vs->vs_ops[t] += cvs->vs_ops[t]; |
|
1850 |
vs->vs_bytes[t] += cvs->vs_bytes[t]; |
|
1851 |
} |
|
1852 |
vs->vs_read_errors += cvs->vs_read_errors; |
|
1853 |
vs->vs_write_errors += cvs->vs_write_errors; |
|
1854 |
vs->vs_checksum_errors += cvs->vs_checksum_errors; |
|
1855 |
vs->vs_scrub_examined += cvs->vs_scrub_examined; |
|
1856 |
vs->vs_scrub_errors += cvs->vs_scrub_errors; |
|
1857 |
mutex_exit(&vd->vdev_stat_lock); |
|
1858 |
} |
|
1859 |
} |
|
1860 |
} |
|
1861 |
||
1862 |
void |
|
5450 | 1863 |
vdev_clear_stats(vdev_t *vd) |
1864 |
{ |
|
1865 |
mutex_enter(&vd->vdev_stat_lock); |
|
1866 |
vd->vdev_stat.vs_space = 0; |
|
1867 |
vd->vdev_stat.vs_dspace = 0; |
|
1868 |
vd->vdev_stat.vs_alloc = 0; |
|
1869 |
mutex_exit(&vd->vdev_stat_lock); |
|
1870 |
} |
|
1871 |
||
1872 |
void |
|
789 | 1873 |
vdev_stat_update(zio_t *zio) |
1874 |
{ |
|
1875 |
vdev_t *vd = zio->io_vd; |
|
1876 |
vdev_t *pvd; |
|
1877 |
uint64_t txg = zio->io_txg; |
|
1878 |
vdev_stat_t *vs = &vd->vdev_stat; |
|
1879 |
zio_type_t type = zio->io_type; |
|
1880 |
int flags = zio->io_flags; |
|
1881 |
||
1882 |
if (zio->io_error == 0) { |
|
1883 |
if (!(flags & ZIO_FLAG_IO_BYPASS)) { |
|
1884 |
mutex_enter(&vd->vdev_stat_lock); |
|
1885 |
vs->vs_ops[type]++; |
|
1886 |
vs->vs_bytes[type] += zio->io_size; |
|
1887 |
mutex_exit(&vd->vdev_stat_lock); |
|
1888 |
} |
|
1889 |
if ((flags & ZIO_FLAG_IO_REPAIR) && |
|
1890 |
zio->io_delegate_list == NULL) { |
|
1891 |
mutex_enter(&vd->vdev_stat_lock); |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1892 |
if (flags & ZIO_FLAG_SCRUB_THREAD) |
789 | 1893 |
vs->vs_scrub_repaired += zio->io_size; |
1894 |
else |
|
1895 |
vs->vs_self_healed += zio->io_size; |
|
1896 |
mutex_exit(&vd->vdev_stat_lock); |
|
1897 |
} |
|
1898 |
return; |
|
1899 |
} |
|
1900 |
||
1901 |
if (flags & ZIO_FLAG_SPECULATIVE) |
|
1902 |
return; |
|
1903 |
||
5329 | 1904 |
if (vdev_readable(vd)) { |
789 | 1905 |
mutex_enter(&vd->vdev_stat_lock); |
1906 |
if (type == ZIO_TYPE_READ) { |
|
1907 |
if (zio->io_error == ECKSUM) |
|
1908 |
vs->vs_checksum_errors++; |
|
1909 |
else |
|
1910 |
vs->vs_read_errors++; |
|
1911 |
} |
|
1912 |
if (type == ZIO_TYPE_WRITE) |
|
1913 |
vs->vs_write_errors++; |
|
1914 |
mutex_exit(&vd->vdev_stat_lock); |
|
1915 |
} |
|
1916 |
||
1917 |
if (type == ZIO_TYPE_WRITE) { |
|
1918 |
if (txg == 0 || vd->vdev_children != 0) |
|
1919 |
return; |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1920 |
if (flags & ZIO_FLAG_SCRUB_THREAD) { |
789 | 1921 |
ASSERT(flags & ZIO_FLAG_IO_REPAIR); |
1922 |
for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) |
|
1923 |
vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1); |
|
1924 |
} |
|
1925 |
if (!(flags & ZIO_FLAG_IO_REPAIR)) { |
|
1926 |
if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1)) |
|
1927 |
return; |
|
1732 | 1928 |
vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg); |
789 | 1929 |
for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) |
1930 |
vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1); |
|
1931 |
} |
|
1932 |
} |
|
1933 |
} |
|
1934 |
||
1935 |
void |
|
1936 |
vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete) |
|
1937 |
{ |
|
1938 |
int c; |
|
1939 |
vdev_stat_t *vs = &vd->vdev_stat; |
|
1940 |
||
1941 |
for (c = 0; c < vd->vdev_children; c++) |
|
1942 |
vdev_scrub_stat_update(vd->vdev_child[c], type, complete); |
|
1943 |
||
1944 |
mutex_enter(&vd->vdev_stat_lock); |
|
1945 |
||
1946 |
if (type == POOL_SCRUB_NONE) { |
|
1947 |
/* |
|
1948 |
* Update completion and end time. Leave everything else alone |
|
1949 |
* so we can report what happened during the previous scrub. |
|
1950 |
*/ |
|
1951 |
vs->vs_scrub_complete = complete; |
|
1952 |
vs->vs_scrub_end = gethrestime_sec(); |
|
1953 |
} else { |
|
1954 |
vs->vs_scrub_type = type; |
|
1955 |
vs->vs_scrub_complete = 0; |
|
1956 |
vs->vs_scrub_examined = 0; |
|
1957 |
vs->vs_scrub_repaired = 0; |
|
1958 |
vs->vs_scrub_errors = 0; |
|
1959 |
vs->vs_scrub_start = gethrestime_sec(); |
|
1960 |
vs->vs_scrub_end = 0; |
|
1961 |
} |
|
1962 |
||
1963 |
mutex_exit(&vd->vdev_stat_lock); |
|
1964 |
} |
|
1965 |
||
1966 |
/* |
|
1967 |
* Update the in-core space usage stats for this vdev and the root vdev. |
|
1968 |
*/ |
|
1969 |
void |
|
5450 | 1970 |
vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta, |
1971 |
boolean_t update_root) |
|
789 | 1972 |
{ |
4527 | 1973 |
int64_t dspace_delta = space_delta; |
1974 |
spa_t *spa = vd->vdev_spa; |
|
1975 |
vdev_t *rvd = spa->spa_root_vdev; |
|
1976 |
||
789 | 1977 |
ASSERT(vd == vd->vdev_top); |
4527 | 1978 |
|
1979 |
/* |
|
1980 |
* Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion |
|
1981 |
* factor. We must calculate this here and not at the root vdev |
|
1982 |
* because the root vdev's psize-to-asize is simply the max of its |
|
1983 |
* childrens', thus not accurate enough for us. |
|
1984 |
*/ |
|
1985 |
ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0); |
|
1986 |
dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) * |
|
1987 |
vd->vdev_deflate_ratio; |
|
789 | 1988 |
|
4527 | 1989 |
mutex_enter(&vd->vdev_stat_lock); |
1990 |
vd->vdev_stat.vs_space += space_delta; |
|
1991 |
vd->vdev_stat.vs_alloc += alloc_delta; |
|
1992 |
vd->vdev_stat.vs_dspace += dspace_delta; |
|
1993 |
mutex_exit(&vd->vdev_stat_lock); |
|
2082 | 1994 |
|
5450 | 1995 |
if (update_root) { |
1996 |
ASSERT(rvd == vd->vdev_parent); |
|
1997 |
ASSERT(vd->vdev_ms_count != 0); |
|
4527 | 1998 |
|
5450 | 1999 |
/* |
2000 |
* Don't count non-normal (e.g. intent log) space as part of |
|
2001 |
* the pool's capacity. |
|
2002 |
*/ |
|
2003 |
if (vd->vdev_mg->mg_class != spa->spa_normal_class) |
|
2004 |
return; |
|
2005 |
||
2006 |
mutex_enter(&rvd->vdev_stat_lock); |
|
2007 |
rvd->vdev_stat.vs_space += space_delta; |
|
2008 |
rvd->vdev_stat.vs_alloc += alloc_delta; |
|
2009 |
rvd->vdev_stat.vs_dspace += dspace_delta; |
|
2010 |
mutex_exit(&rvd->vdev_stat_lock); |
|
2011 |
} |
|
789 | 2012 |
} |
2013 |
||
2014 |
/* |
|
2015 |
* Mark a top-level vdev's config as dirty, placing it on the dirty list |
|
2016 |
* so that it will be written out next time the vdev configuration is synced. |
|
2017 |
* If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs. |
|
2018 |
*/ |
|
2019 |
void |
|
2020 |
vdev_config_dirty(vdev_t *vd) |
|
2021 |
{ |
|
2022 |
spa_t *spa = vd->vdev_spa; |
|
2023 |
vdev_t *rvd = spa->spa_root_vdev; |
|
2024 |
int c; |
|
2025 |
||
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2026 |
/* |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2027 |
* If this is an aux vdev (as with l2cache devices), then we update the |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2028 |
* vdev config manually and set the sync flag. |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2029 |
*/ |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2030 |
if (vd->vdev_aux != NULL) { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2031 |
spa_aux_vdev_t *sav = vd->vdev_aux; |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2032 |
nvlist_t **aux; |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2033 |
uint_t naux; |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2034 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2035 |
for (c = 0; c < sav->sav_count; c++) { |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2036 |
if (sav->sav_vdevs[c] == vd) |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2037 |
break; |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2038 |
} |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2039 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2040 |
ASSERT(c < sav->sav_count); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2041 |
sav->sav_sync = B_TRUE; |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2042 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2043 |
VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2044 |
ZPOOL_CONFIG_L2CACHE, &aux, &naux) == 0); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2045 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2046 |
ASSERT(c < naux); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2047 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2048 |
/* |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2049 |
* Setting the nvlist in the middle if the array is a little |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2050 |
* sketchy, but it will work. |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2051 |
*/ |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2052 |
nvlist_free(aux[c]); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2053 |
aux[c] = vdev_config_generate(spa, vd, B_TRUE, B_FALSE, B_TRUE); |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2054 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2055 |
return; |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2056 |
} |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2057 |
|
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2058 |
/* |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2059 |
* The dirty list is protected by the config lock. The caller must |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2060 |
* either hold the config lock as writer, or must be the sync thread |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2061 |
* (which holds the lock as reader). There's only one sync thread, |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2062 |
* so this is sufficient to ensure mutual exclusion. |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2063 |
*/ |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2064 |
ASSERT(spa_config_held(spa, RW_WRITER) || |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2065 |
dsl_pool_sync_context(spa_get_dsl(spa))); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2066 |
|
789 | 2067 |
if (vd == rvd) { |
2068 |
for (c = 0; c < rvd->vdev_children; c++) |
|
2069 |
vdev_config_dirty(rvd->vdev_child[c]); |
|
2070 |
} else { |
|
2071 |
ASSERT(vd == vd->vdev_top); |
|
2072 |
||
1732 | 2073 |
if (!list_link_active(&vd->vdev_dirty_node)) |
789 | 2074 |
list_insert_head(&spa->spa_dirty_list, vd); |
2075 |
} |
|
2076 |
} |
|
2077 |
||
2078 |
void |
|
2079 |
vdev_config_clean(vdev_t *vd) |
|
2080 |
{ |
|
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2081 |
spa_t *spa = vd->vdev_spa; |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2082 |
|
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2083 |
ASSERT(spa_config_held(spa, RW_WRITER) || |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2084 |
dsl_pool_sync_context(spa_get_dsl(spa))); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2085 |
|
1732 | 2086 |
ASSERT(list_link_active(&vd->vdev_dirty_node)); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
2087 |
list_remove(&spa->spa_dirty_list, vd); |
789 | 2088 |
} |
2089 |
||
6523
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
2090 |
/* |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
2091 |
* Propagate vdev state up from children to parent. |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
2092 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2093 |
void |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2094 |
vdev_propagate_state(vdev_t *vd) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2095 |
{ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2096 |
vdev_t *rvd = vd->vdev_spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2097 |
int degraded = 0, faulted = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2098 |
int corrupted = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2099 |
int c; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2100 |
vdev_t *child; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2101 |
|
4451 | 2102 |
if (vd->vdev_children > 0) { |
2103 |
for (c = 0; c < vd->vdev_children; c++) { |
|
2104 |
child = vd->vdev_child[c]; |
|
5329 | 2105 |
if (vdev_is_dead(child) && !vdev_readable(child)) |
4451 | 2106 |
faulted++; |
6523
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
2107 |
else if (child->vdev_stat.vs_aux == VDEV_AUX_IO_FAILURE) |
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
5530
diff
changeset
|
2108 |
faulted++; |
5329 | 2109 |
else if (child->vdev_state <= VDEV_STATE_DEGRADED) |
4451 | 2110 |
degraded++; |
2111 |
||
2112 |
if (child->vdev_stat.vs_aux == VDEV_AUX_CORRUPT_DATA) |
|
2113 |
corrupted++; |
|
2114 |
} |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2115 |
|
4451 | 2116 |
vd->vdev_ops->vdev_op_state_change(vd, faulted, degraded); |
2117 |
||
2118 |
/* |
|
2119 |
* Root special: if there is a toplevel vdev that cannot be |
|
2120 |
* opened due to corrupted metadata, then propagate the root |
|
2121 |
* vdev's aux state as 'corrupt' rather than 'insufficient |
|
2122 |
* replicas'. |
|
2123 |
*/ |
|
2124 |
if (corrupted && vd == rvd && |
|
2125 |
rvd->vdev_state == VDEV_STATE_CANT_OPEN) |
|
2126 |
vdev_set_state(rvd, B_FALSE, VDEV_STATE_CANT_OPEN, |
|
2127 |
VDEV_AUX_CORRUPT_DATA); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2128 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2129 |
|
4527 | 2130 |
if (vd->vdev_parent && !vd->vdev_islog) |
4451 | 2131 |
vdev_propagate_state(vd->vdev_parent); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2132 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
2133 |
|
789 | 2134 |
/* |
1544 | 2135 |
* Set a vdev's state. If this is during an open, we don't update the parent |
2136 |
* state, because we're in the process of opening children depth-first. |
|
2137 |
* Otherwise, we propagate the change to the parent. |
|
2138 |
* |
|
2139 |
* If this routine places a device in a faulted state, an appropriate ereport is |
|
2140 |
* generated. |
|
789 | 2141 |
*/ |
2142 |
void |
|
1544 | 2143 |
vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) |
789 | 2144 |
{ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2145 |
uint64_t save_state; |
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2146 |
spa_t *spa = vd->vdev_spa; |
1544 | 2147 |
|
2148 |
if (state == vd->vdev_state) { |
|
2149 |
vd->vdev_stat.vs_aux = aux; |
|
789 | 2150 |
return; |
1544 | 2151 |
} |
2152 |
||
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2153 |
save_state = vd->vdev_state; |
789 | 2154 |
|
2155 |
vd->vdev_state = state; |
|
2156 |
vd->vdev_stat.vs_aux = aux; |
|
2157 |
||
4451 | 2158 |
/* |
2159 |
* If we are setting the vdev state to anything but an open state, then |
|
2160 |
* always close the underlying device. Otherwise, we keep accessible |
|
2161 |
* but invalid devices open forever. We don't call vdev_close() itself, |
|
2162 |
* because that implies some extra checks (offline, etc) that we don't |
|
2163 |
* want here. This is limited to leaf devices, because otherwise |
|
2164 |
* closing the device will affect other children. |
|
2165 |
*/ |
|
5329 | 2166 |
if (!vdev_readable(vd) && vd->vdev_ops->vdev_op_leaf) |
4451 | 2167 |
vd->vdev_ops->vdev_op_close(vd); |
2168 |
||
2169 |
if (vd->vdev_removed && |
|
2170 |
state == VDEV_STATE_CANT_OPEN && |
|
2171 |
(aux == VDEV_AUX_OPEN_FAILED || vd->vdev_checkremove)) { |
|
2172 |
/* |
|
2173 |
* If the previous state is set to VDEV_STATE_REMOVED, then this |
|
2174 |
* device was previously marked removed and someone attempted to |
|
2175 |
* reopen it. If this failed due to a nonexistent device, then |
|
2176 |
* keep the device in the REMOVED state. We also let this be if |
|
2177 |
* it is one of our special test online cases, which is only |
|
2178 |
* attempting to online the device and shouldn't generate an FMA |
|
2179 |
* fault. |
|
2180 |
*/ |
|
2181 |
vd->vdev_state = VDEV_STATE_REMOVED; |
|
2182 |
vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
|
2183 |
} else if (state == VDEV_STATE_REMOVED) { |
|
2184 |
/* |
|
2185 |
* Indicate to the ZFS DE that this device has been removed, and |
|
2186 |
* any recent errors should be ignored. |
|
2187 |
*/ |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2188 |
zfs_post_remove(spa, vd); |
4451 | 2189 |
vd->vdev_removed = B_TRUE; |
2190 |
} else if (state == VDEV_STATE_CANT_OPEN) { |
|
1544 | 2191 |
/* |
2192 |
* If we fail to open a vdev during an import, we mark it as |
|
2193 |
* "not available", which signifies that it was never there to |
|
2194 |
* begin with. Failure to open such a device is not considered |
|
2195 |
* an error. |
|
2196 |
*/ |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2197 |
if (spa->spa_load_state == SPA_LOAD_IMPORT && |
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2198 |
!spa->spa_import_faulted && |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2199 |
vd->vdev_ops->vdev_op_leaf) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2200 |
vd->vdev_not_present = 1; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2201 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2202 |
/* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2203 |
* Post the appropriate ereport. If the 'prevstate' field is |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2204 |
* set to something other than VDEV_STATE_UNKNOWN, it indicates |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2205 |
* that this is part of a vdev_reopen(). In this case, we don't |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2206 |
* want to post the ereport if the device was already in the |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2207 |
* CANT_OPEN state beforehand. |
4451 | 2208 |
* |
2209 |
* If the 'checkremove' flag is set, then this is an attempt to |
|
2210 |
* online the device in response to an insertion event. If we |
|
2211 |
* hit this case, then we have detected an insertion event for a |
|
2212 |
* faulted or offline device that wasn't in the removed state. |
|
2213 |
* In this scenario, we don't post an ereport because we are |
|
2214 |
* about to replace the device, or attempt an online with |
|
2215 |
* vdev_forcefault, which will generate the fault for us. |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
2216 |
*/ |
4451 | 2217 |
if ((vd->vdev_prevstate != state || vd->vdev_forcefault) && |
2218 |
!vd->vdev_not_present && !vd->vdev_checkremove && |
|
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2219 |
vd != spa->spa_root_vdev) { |
1544 | 2220 |
const char *class; |
2221 |
||
2222 |
switch (aux) { |
|
2223 |
case VDEV_AUX_OPEN_FAILED: |
|
2224 |
class = FM_EREPORT_ZFS_DEVICE_OPEN_FAILED; |
|
2225 |
break; |
|
2226 |
case VDEV_AUX_CORRUPT_DATA: |
|
2227 |
class = FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA; |
|
2228 |
break; |
|
2229 |
case VDEV_AUX_NO_REPLICAS: |
|
2230 |
class = FM_EREPORT_ZFS_DEVICE_NO_REPLICAS; |
|
2231 |
break; |
|
2232 |
case VDEV_AUX_BAD_GUID_SUM: |
|
2233 |
class = FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM; |
|
2234 |
break; |
|
2235 |
case VDEV_AUX_TOO_SMALL: |
|
2236 |
class = FM_EREPORT_ZFS_DEVICE_TOO_SMALL; |
|
2237 |
break; |
|
2238 |
case VDEV_AUX_BAD_LABEL: |
|
2239 |
class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL; |
|
2240 |
break; |
|
2241 |
default: |
|
2242 |
class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; |
|
2243 |
} |
|
2244 |
||
6643
3a34b0dbb107
6625086 changing cachefile doesn't remove old cache on last user
eschrock
parents:
6523
diff
changeset
|
2245 |
zfs_ereport_post(class, spa, vd, NULL, save_state, 0); |
1544 | 2246 |
} |
4451 | 2247 |
|
2248 |
/* Erase any notion of persistent removed state */ |
|
2249 |
vd->vdev_removed = B_FALSE; |
|
2250 |
} else { |
|
2251 |
vd->vdev_removed = B_FALSE; |
|
1544 | 2252 |
} |
2253 |
||
4451 | 2254 |
if (!isopen) |
2255 |
vdev_propagate_state(vd); |
|
789 | 2256 |
} |