author | eschrock |
Mon, 12 Jun 2006 08:20:57 -0700 | |
changeset 2174 | 73de7a781492 |
parent 2082 | 76b439ec3ac1 |
child 2237 | 45affe88ed99 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1485 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
2082 | 21 |
|
789 | 22 |
/* |
1199 | 23 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
#include <sys/zfs_context.h> |
|
1544 | 30 |
#include <sys/fm/fs/zfs.h> |
789 | 31 |
#include <sys/spa.h> |
32 |
#include <sys/spa_impl.h> |
|
33 |
#include <sys/dmu.h> |
|
34 |
#include <sys/dmu_tx.h> |
|
35 |
#include <sys/vdev_impl.h> |
|
36 |
#include <sys/uberblock_impl.h> |
|
37 |
#include <sys/metaslab.h> |
|
38 |
#include <sys/metaslab_impl.h> |
|
39 |
#include <sys/space_map.h> |
|
40 |
#include <sys/zio.h> |
|
41 |
#include <sys/zap.h> |
|
42 |
#include <sys/fs/zfs.h> |
|
43 |
||
44 |
/* |
|
45 |
* Virtual device management. |
|
46 |
*/ |
|
47 |
||
48 |
static vdev_ops_t *vdev_ops_table[] = { |
|
49 |
&vdev_root_ops, |
|
50 |
&vdev_raidz_ops, |
|
51 |
&vdev_mirror_ops, |
|
52 |
&vdev_replacing_ops, |
|
2082 | 53 |
&vdev_spare_ops, |
789 | 54 |
&vdev_disk_ops, |
55 |
&vdev_file_ops, |
|
56 |
&vdev_missing_ops, |
|
57 |
NULL |
|
58 |
}; |
|
59 |
||
60 |
/* |
|
61 |
* Given a vdev type, return the appropriate ops vector. |
|
62 |
*/ |
|
63 |
static vdev_ops_t * |
|
64 |
vdev_getops(const char *type) |
|
65 |
{ |
|
66 |
vdev_ops_t *ops, **opspp; |
|
67 |
||
68 |
for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) |
|
69 |
if (strcmp(ops->vdev_op_type, type) == 0) |
|
70 |
break; |
|
71 |
||
72 |
return (ops); |
|
73 |
} |
|
74 |
||
75 |
/* |
|
76 |
* Default asize function: return the MAX of psize with the asize of |
|
77 |
* all children. This is what's used by anything other than RAID-Z. |
|
78 |
*/ |
|
79 |
uint64_t |
|
80 |
vdev_default_asize(vdev_t *vd, uint64_t psize) |
|
81 |
{ |
|
1732 | 82 |
uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift); |
789 | 83 |
uint64_t csize; |
84 |
uint64_t c; |
|
85 |
||
86 |
for (c = 0; c < vd->vdev_children; c++) { |
|
87 |
csize = vdev_psize_to_asize(vd->vdev_child[c], psize); |
|
88 |
asize = MAX(asize, csize); |
|
89 |
} |
|
90 |
||
91 |
return (asize); |
|
92 |
} |
|
93 |
||
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
94 |
/* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
95 |
* Get the replaceable or attachable device size. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
96 |
* If the parent is a mirror or raidz, the replaceable size is the minimum |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
97 |
* psize of all its children. For the rest, just return our own psize. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
98 |
* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
99 |
* e.g. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
100 |
* psize rsize |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
101 |
* root - - |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
102 |
* mirror/raidz - - |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
103 |
* disk1 20g 20g |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
104 |
* disk2 40g 20g |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
105 |
* disk3 80g 80g |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
106 |
*/ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
107 |
uint64_t |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
108 |
vdev_get_rsize(vdev_t *vd) |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
109 |
{ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
110 |
vdev_t *pvd, *cvd; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
111 |
uint64_t c, rsize; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
112 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
113 |
pvd = vd->vdev_parent; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
114 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
115 |
/* |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
116 |
* If our parent is NULL or the root, just return our own psize. |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
117 |
*/ |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
118 |
if (pvd == NULL || pvd->vdev_parent == NULL) |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
119 |
return (vd->vdev_psize); |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
120 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
121 |
rsize = 0; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
122 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
123 |
for (c = 0; c < pvd->vdev_children; c++) { |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
124 |
cvd = pvd->vdev_child[c]; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
125 |
rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1; |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
126 |
} |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
127 |
|
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
128 |
return (rsize); |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
129 |
} |
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
130 |
|
789 | 131 |
vdev_t * |
132 |
vdev_lookup_top(spa_t *spa, uint64_t vdev) |
|
133 |
{ |
|
134 |
vdev_t *rvd = spa->spa_root_vdev; |
|
135 |
||
136 |
if (vdev < rvd->vdev_children) |
|
137 |
return (rvd->vdev_child[vdev]); |
|
138 |
||
139 |
return (NULL); |
|
140 |
} |
|
141 |
||
142 |
vdev_t * |
|
143 |
vdev_lookup_by_guid(vdev_t *vd, uint64_t guid) |
|
144 |
{ |
|
145 |
int c; |
|
146 |
vdev_t *mvd; |
|
147 |
||
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
148 |
if (vd->vdev_guid == guid) |
789 | 149 |
return (vd); |
150 |
||
151 |
for (c = 0; c < vd->vdev_children; c++) |
|
152 |
if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) != |
|
153 |
NULL) |
|
154 |
return (mvd); |
|
155 |
||
156 |
return (NULL); |
|
157 |
} |
|
158 |
||
159 |
void |
|
160 |
vdev_add_child(vdev_t *pvd, vdev_t *cvd) |
|
161 |
{ |
|
162 |
size_t oldsize, newsize; |
|
163 |
uint64_t id = cvd->vdev_id; |
|
164 |
vdev_t **newchild; |
|
165 |
||
166 |
ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); |
|
167 |
ASSERT(cvd->vdev_parent == NULL); |
|
168 |
||
169 |
cvd->vdev_parent = pvd; |
|
170 |
||
171 |
if (pvd == NULL) |
|
172 |
return; |
|
173 |
||
174 |
ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL); |
|
175 |
||
176 |
oldsize = pvd->vdev_children * sizeof (vdev_t *); |
|
177 |
pvd->vdev_children = MAX(pvd->vdev_children, id + 1); |
|
178 |
newsize = pvd->vdev_children * sizeof (vdev_t *); |
|
179 |
||
180 |
newchild = kmem_zalloc(newsize, KM_SLEEP); |
|
181 |
if (pvd->vdev_child != NULL) { |
|
182 |
bcopy(pvd->vdev_child, newchild, oldsize); |
|
183 |
kmem_free(pvd->vdev_child, oldsize); |
|
184 |
} |
|
185 |
||
186 |
pvd->vdev_child = newchild; |
|
187 |
pvd->vdev_child[id] = cvd; |
|
188 |
||
189 |
cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); |
|
190 |
ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); |
|
191 |
||
192 |
/* |
|
193 |
* Walk up all ancestors to update guid sum. |
|
194 |
*/ |
|
195 |
for (; pvd != NULL; pvd = pvd->vdev_parent) |
|
196 |
pvd->vdev_guid_sum += cvd->vdev_guid_sum; |
|
197 |
} |
|
198 |
||
199 |
void |
|
200 |
vdev_remove_child(vdev_t *pvd, vdev_t *cvd) |
|
201 |
{ |
|
202 |
int c; |
|
203 |
uint_t id = cvd->vdev_id; |
|
204 |
||
205 |
ASSERT(cvd->vdev_parent == pvd); |
|
206 |
||
207 |
if (pvd == NULL) |
|
208 |
return; |
|
209 |
||
210 |
ASSERT(id < pvd->vdev_children); |
|
211 |
ASSERT(pvd->vdev_child[id] == cvd); |
|
212 |
||
213 |
pvd->vdev_child[id] = NULL; |
|
214 |
cvd->vdev_parent = NULL; |
|
215 |
||
216 |
for (c = 0; c < pvd->vdev_children; c++) |
|
217 |
if (pvd->vdev_child[c]) |
|
218 |
break; |
|
219 |
||
220 |
if (c == pvd->vdev_children) { |
|
221 |
kmem_free(pvd->vdev_child, c * sizeof (vdev_t *)); |
|
222 |
pvd->vdev_child = NULL; |
|
223 |
pvd->vdev_children = 0; |
|
224 |
} |
|
225 |
||
226 |
/* |
|
227 |
* Walk up all ancestors to update guid sum. |
|
228 |
*/ |
|
229 |
for (; pvd != NULL; pvd = pvd->vdev_parent) |
|
230 |
pvd->vdev_guid_sum -= cvd->vdev_guid_sum; |
|
231 |
} |
|
232 |
||
233 |
/* |
|
234 |
* Remove any holes in the child array. |
|
235 |
*/ |
|
236 |
void |
|
237 |
vdev_compact_children(vdev_t *pvd) |
|
238 |
{ |
|
239 |
vdev_t **newchild, *cvd; |
|
240 |
int oldc = pvd->vdev_children; |
|
241 |
int newc, c; |
|
242 |
||
243 |
ASSERT(spa_config_held(pvd->vdev_spa, RW_WRITER)); |
|
244 |
||
245 |
for (c = newc = 0; c < oldc; c++) |
|
246 |
if (pvd->vdev_child[c]) |
|
247 |
newc++; |
|
248 |
||
249 |
newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP); |
|
250 |
||
251 |
for (c = newc = 0; c < oldc; c++) { |
|
252 |
if ((cvd = pvd->vdev_child[c]) != NULL) { |
|
253 |
newchild[newc] = cvd; |
|
254 |
cvd->vdev_id = newc++; |
|
255 |
} |
|
256 |
} |
|
257 |
||
258 |
kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *)); |
|
259 |
pvd->vdev_child = newchild; |
|
260 |
pvd->vdev_children = newc; |
|
261 |
} |
|
262 |
||
263 |
/* |
|
264 |
* Allocate and minimally initialize a vdev_t. |
|
265 |
*/ |
|
266 |
static vdev_t * |
|
267 |
vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) |
|
268 |
{ |
|
269 |
vdev_t *vd; |
|
270 |
||
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
271 |
vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
272 |
|
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
273 |
if (spa->spa_root_vdev == NULL) { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
274 |
ASSERT(ops == &vdev_root_ops); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
275 |
spa->spa_root_vdev = vd; |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
276 |
} |
789 | 277 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
278 |
if (guid == 0) { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
279 |
if (spa->spa_root_vdev == vd) { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
280 |
/* |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
281 |
* The root vdev's guid will also be the pool guid, |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
282 |
* which must be unique among all pools. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
283 |
*/ |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
284 |
while (guid == 0 || spa_guid_exists(guid, 0)) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
285 |
guid = spa_get_random(-1ULL); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
286 |
} else { |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
287 |
/* |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
288 |
* Any other vdev's guid must be unique within the pool. |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
289 |
*/ |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
290 |
while (guid == 0 || |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
291 |
spa_guid_exists(spa_guid(spa), guid)) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
292 |
guid = spa_get_random(-1ULL); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
293 |
} |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
294 |
ASSERT(!spa_guid_exists(spa_guid(spa), guid)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
295 |
} |
789 | 296 |
|
297 |
vd->vdev_spa = spa; |
|
298 |
vd->vdev_id = id; |
|
299 |
vd->vdev_guid = guid; |
|
300 |
vd->vdev_guid_sum = guid; |
|
301 |
vd->vdev_ops = ops; |
|
302 |
vd->vdev_state = VDEV_STATE_CLOSED; |
|
303 |
||
304 |
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); |
|
305 |
space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock); |
|
306 |
space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock); |
|
307 |
txg_list_create(&vd->vdev_ms_list, |
|
308 |
offsetof(struct metaslab, ms_txg_node)); |
|
309 |
txg_list_create(&vd->vdev_dtl_list, |
|
310 |
offsetof(struct vdev, vdev_dtl_node)); |
|
311 |
vd->vdev_stat.vs_timestamp = gethrtime(); |
|
312 |
||
313 |
return (vd); |
|
314 |
} |
|
315 |
||
316 |
/* |
|
317 |
* Free a vdev_t that has been removed from service. |
|
318 |
*/ |
|
319 |
static void |
|
320 |
vdev_free_common(vdev_t *vd) |
|
321 |
{ |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
322 |
spa_t *spa = vd->vdev_spa; |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
323 |
|
789 | 324 |
if (vd->vdev_path) |
325 |
spa_strfree(vd->vdev_path); |
|
326 |
if (vd->vdev_devid) |
|
327 |
spa_strfree(vd->vdev_devid); |
|
328 |
||
2082 | 329 |
if (vd->vdev_isspare) |
330 |
spa_spare_remove(vd->vdev_guid); |
|
331 |
||
789 | 332 |
txg_list_destroy(&vd->vdev_ms_list); |
333 |
txg_list_destroy(&vd->vdev_dtl_list); |
|
334 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1732 | 335 |
space_map_unload(&vd->vdev_dtl_map); |
789 | 336 |
space_map_destroy(&vd->vdev_dtl_map); |
337 |
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
338 |
space_map_destroy(&vd->vdev_dtl_scrub); |
|
339 |
mutex_exit(&vd->vdev_dtl_lock); |
|
340 |
mutex_destroy(&vd->vdev_dtl_lock); |
|
341 |
||
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
342 |
if (vd == spa->spa_root_vdev) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
343 |
spa->spa_root_vdev = NULL; |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
344 |
|
789 | 345 |
kmem_free(vd, sizeof (vdev_t)); |
346 |
} |
|
347 |
||
348 |
/* |
|
349 |
* Allocate a new vdev. The 'alloctype' is used to control whether we are |
|
350 |
* creating a new vdev or loading an existing one - the behavior is slightly |
|
351 |
* different for each case. |
|
352 |
*/ |
|
2082 | 353 |
int |
354 |
vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, |
|
355 |
int alloctype) |
|
789 | 356 |
{ |
357 |
vdev_ops_t *ops; |
|
358 |
char *type; |
|
1732 | 359 |
uint64_t guid = 0; |
789 | 360 |
vdev_t *vd; |
361 |
||
362 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
|
363 |
||
364 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) |
|
2082 | 365 |
return (EINVAL); |
789 | 366 |
|
367 |
if ((ops = vdev_getops(type)) == NULL) |
|
2082 | 368 |
return (EINVAL); |
789 | 369 |
|
370 |
/* |
|
371 |
* If this is a load, get the vdev guid from the nvlist. |
|
372 |
* Otherwise, vdev_alloc_common() will generate one for us. |
|
373 |
*/ |
|
374 |
if (alloctype == VDEV_ALLOC_LOAD) { |
|
375 |
uint64_t label_id; |
|
376 |
||
377 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || |
|
378 |
label_id != id) |
|
2082 | 379 |
return (EINVAL); |
789 | 380 |
|
381 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) |
|
2082 | 382 |
return (EINVAL); |
383 |
} else if (alloctype == VDEV_ALLOC_SPARE) { |
|
384 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) |
|
385 |
return (EINVAL); |
|
789 | 386 |
} |
387 |
||
2082 | 388 |
/* |
389 |
* The first allocated vdev must be of type 'root'. |
|
390 |
*/ |
|
391 |
if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL) |
|
392 |
return (EINVAL); |
|
393 |
||
789 | 394 |
vd = vdev_alloc_common(spa, id, guid, ops); |
395 |
||
396 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) |
|
397 |
vd->vdev_path = spa_strdup(vd->vdev_path); |
|
398 |
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) |
|
399 |
vd->vdev_devid = spa_strdup(vd->vdev_devid); |
|
400 |
||
401 |
/* |
|
2082 | 402 |
* Set the nparity propery for RAID-Z vdevs. |
403 |
*/ |
|
404 |
if (ops == &vdev_raidz_ops) { |
|
405 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, |
|
406 |
&vd->vdev_nparity) == 0) { |
|
407 |
/* |
|
408 |
* Currently, we can only support 2 parity devices. |
|
409 |
*/ |
|
410 |
if (vd->vdev_nparity > 2) |
|
411 |
return (EINVAL); |
|
412 |
/* |
|
413 |
* Older versions can only support 1 parity device. |
|
414 |
*/ |
|
415 |
if (vd->vdev_nparity == 2 && |
|
416 |
spa_version(spa) < ZFS_VERSION_RAID6) |
|
417 |
return (ENOTSUP); |
|
418 |
||
419 |
} else { |
|
420 |
/* |
|
421 |
* We require the parity to be specified for SPAs that |
|
422 |
* support multiple parity levels. |
|
423 |
*/ |
|
424 |
if (spa_version(spa) >= ZFS_VERSION_RAID6) |
|
425 |
return (EINVAL); |
|
426 |
||
427 |
/* |
|
428 |
* Otherwise, we default to 1 parity device for RAID-Z. |
|
429 |
*/ |
|
430 |
vd->vdev_nparity = 1; |
|
431 |
} |
|
432 |
} else { |
|
433 |
vd->vdev_nparity = 0; |
|
434 |
} |
|
435 |
||
436 |
/* |
|
1171 | 437 |
* Set the whole_disk property. If it's not specified, leave the value |
438 |
* as -1. |
|
439 |
*/ |
|
440 |
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, |
|
441 |
&vd->vdev_wholedisk) != 0) |
|
442 |
vd->vdev_wholedisk = -1ULL; |
|
443 |
||
444 |
/* |
|
1544 | 445 |
* Look for the 'not present' flag. This will only be set if the device |
446 |
* was not present at the time of import. |
|
447 |
*/ |
|
448 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, |
|
449 |
&vd->vdev_not_present); |
|
450 |
||
451 |
/* |
|
1732 | 452 |
* Get the alignment requirement. |
453 |
*/ |
|
454 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); |
|
455 |
||
456 |
/* |
|
2082 | 457 |
* Look for the 'is_spare' flag. If this is the case, then we are a |
458 |
* repurposed hot spare. |
|
459 |
*/ |
|
460 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, |
|
461 |
&vd->vdev_isspare); |
|
462 |
if (vd->vdev_isspare) |
|
463 |
spa_spare_add(vd->vdev_guid); |
|
464 |
||
465 |
/* |
|
789 | 466 |
* If we're a top-level vdev, try to load the allocation parameters. |
467 |
*/ |
|
468 |
if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { |
|
469 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, |
|
470 |
&vd->vdev_ms_array); |
|
471 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, |
|
472 |
&vd->vdev_ms_shift); |
|
473 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, |
|
474 |
&vd->vdev_asize); |
|
475 |
} |
|
476 |
||
477 |
/* |
|
1732 | 478 |
* If we're a leaf vdev, try to load the DTL object and offline state. |
789 | 479 |
*/ |
480 |
if (vd->vdev_ops->vdev_op_leaf && alloctype == VDEV_ALLOC_LOAD) { |
|
481 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, |
|
482 |
&vd->vdev_dtl.smo_object); |
|
1732 | 483 |
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, |
484 |
&vd->vdev_offline); |
|
789 | 485 |
} |
486 |
||
487 |
/* |
|
488 |
* Add ourselves to the parent's list of children. |
|
489 |
*/ |
|
490 |
vdev_add_child(parent, vd); |
|
491 |
||
2082 | 492 |
*vdp = vd; |
493 |
||
494 |
return (0); |
|
789 | 495 |
} |
496 |
||
497 |
void |
|
498 |
vdev_free(vdev_t *vd) |
|
499 |
{ |
|
500 |
int c; |
|
501 |
||
502 |
/* |
|
503 |
* vdev_free() implies closing the vdev first. This is simpler than |
|
504 |
* trying to ensure complicated semantics for all callers. |
|
505 |
*/ |
|
506 |
vdev_close(vd); |
|
507 |
||
1732 | 508 |
ASSERT(!list_link_active(&vd->vdev_dirty_node)); |
789 | 509 |
|
510 |
/* |
|
511 |
* Free all children. |
|
512 |
*/ |
|
513 |
for (c = 0; c < vd->vdev_children; c++) |
|
514 |
vdev_free(vd->vdev_child[c]); |
|
515 |
||
516 |
ASSERT(vd->vdev_child == NULL); |
|
517 |
ASSERT(vd->vdev_guid_sum == vd->vdev_guid); |
|
518 |
||
519 |
/* |
|
520 |
* Discard allocation state. |
|
521 |
*/ |
|
522 |
if (vd == vd->vdev_top) |
|
523 |
vdev_metaslab_fini(vd); |
|
524 |
||
525 |
ASSERT3U(vd->vdev_stat.vs_space, ==, 0); |
|
2082 | 526 |
ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0); |
789 | 527 |
ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); |
528 |
||
529 |
/* |
|
530 |
* Remove this vdev from its parent's child list. |
|
531 |
*/ |
|
532 |
vdev_remove_child(vd->vdev_parent, vd); |
|
533 |
||
534 |
ASSERT(vd->vdev_parent == NULL); |
|
535 |
||
536 |
vdev_free_common(vd); |
|
537 |
} |
|
538 |
||
539 |
/* |
|
540 |
* Transfer top-level vdev state from svd to tvd. |
|
541 |
*/ |
|
542 |
static void |
|
543 |
vdev_top_transfer(vdev_t *svd, vdev_t *tvd) |
|
544 |
{ |
|
545 |
spa_t *spa = svd->vdev_spa; |
|
546 |
metaslab_t *msp; |
|
547 |
vdev_t *vd; |
|
548 |
int t; |
|
549 |
||
550 |
ASSERT(tvd == tvd->vdev_top); |
|
551 |
||
552 |
tvd->vdev_ms_array = svd->vdev_ms_array; |
|
553 |
tvd->vdev_ms_shift = svd->vdev_ms_shift; |
|
554 |
tvd->vdev_ms_count = svd->vdev_ms_count; |
|
555 |
||
556 |
svd->vdev_ms_array = 0; |
|
557 |
svd->vdev_ms_shift = 0; |
|
558 |
svd->vdev_ms_count = 0; |
|
559 |
||
560 |
tvd->vdev_mg = svd->vdev_mg; |
|
561 |
tvd->vdev_ms = svd->vdev_ms; |
|
562 |
||
563 |
svd->vdev_mg = NULL; |
|
564 |
svd->vdev_ms = NULL; |
|
1732 | 565 |
|
566 |
if (tvd->vdev_mg != NULL) |
|
567 |
tvd->vdev_mg->mg_vd = tvd; |
|
789 | 568 |
|
569 |
tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; |
|
570 |
tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; |
|
2082 | 571 |
tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace; |
789 | 572 |
|
573 |
svd->vdev_stat.vs_alloc = 0; |
|
574 |
svd->vdev_stat.vs_space = 0; |
|
2082 | 575 |
svd->vdev_stat.vs_dspace = 0; |
789 | 576 |
|
577 |
for (t = 0; t < TXG_SIZE; t++) { |
|
578 |
while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) |
|
579 |
(void) txg_list_add(&tvd->vdev_ms_list, msp, t); |
|
580 |
while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL) |
|
581 |
(void) txg_list_add(&tvd->vdev_dtl_list, vd, t); |
|
582 |
if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t)) |
|
583 |
(void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t); |
|
584 |
} |
|
585 |
||
1732 | 586 |
if (list_link_active(&svd->vdev_dirty_node)) { |
789 | 587 |
vdev_config_clean(svd); |
588 |
vdev_config_dirty(tvd); |
|
589 |
} |
|
590 |
||
1544 | 591 |
tvd->vdev_reopen_wanted = svd->vdev_reopen_wanted; |
592 |
svd->vdev_reopen_wanted = 0; |
|
2082 | 593 |
|
594 |
tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio; |
|
595 |
svd->vdev_deflate_ratio = 0; |
|
789 | 596 |
} |
597 |
||
598 |
static void |
|
599 |
vdev_top_update(vdev_t *tvd, vdev_t *vd) |
|
600 |
{ |
|
601 |
int c; |
|
602 |
||
603 |
if (vd == NULL) |
|
604 |
return; |
|
605 |
||
606 |
vd->vdev_top = tvd; |
|
607 |
||
608 |
for (c = 0; c < vd->vdev_children; c++) |
|
609 |
vdev_top_update(tvd, vd->vdev_child[c]); |
|
610 |
} |
|
611 |
||
612 |
/* |
|
613 |
* Add a mirror/replacing vdev above an existing vdev. |
|
614 |
*/ |
|
615 |
vdev_t * |
|
616 |
vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) |
|
617 |
{ |
|
618 |
spa_t *spa = cvd->vdev_spa; |
|
619 |
vdev_t *pvd = cvd->vdev_parent; |
|
620 |
vdev_t *mvd; |
|
621 |
||
622 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
|
623 |
||
624 |
mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); |
|
1732 | 625 |
|
626 |
mvd->vdev_asize = cvd->vdev_asize; |
|
627 |
mvd->vdev_ashift = cvd->vdev_ashift; |
|
628 |
mvd->vdev_state = cvd->vdev_state; |
|
629 |
||
789 | 630 |
vdev_remove_child(pvd, cvd); |
631 |
vdev_add_child(pvd, mvd); |
|
632 |
cvd->vdev_id = mvd->vdev_children; |
|
633 |
vdev_add_child(mvd, cvd); |
|
634 |
vdev_top_update(cvd->vdev_top, cvd->vdev_top); |
|
635 |
||
636 |
if (mvd == mvd->vdev_top) |
|
637 |
vdev_top_transfer(cvd, mvd); |
|
638 |
||
639 |
return (mvd); |
|
640 |
} |
|
641 |
||
642 |
/* |
|
643 |
* Remove a 1-way mirror/replacing vdev from the tree. |
|
644 |
*/ |
|
645 |
void |
|
646 |
vdev_remove_parent(vdev_t *cvd) |
|
647 |
{ |
|
648 |
vdev_t *mvd = cvd->vdev_parent; |
|
649 |
vdev_t *pvd = mvd->vdev_parent; |
|
650 |
||
651 |
ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER)); |
|
652 |
||
653 |
ASSERT(mvd->vdev_children == 1); |
|
654 |
ASSERT(mvd->vdev_ops == &vdev_mirror_ops || |
|
2082 | 655 |
mvd->vdev_ops == &vdev_replacing_ops || |
656 |
mvd->vdev_ops == &vdev_spare_ops); |
|
1732 | 657 |
cvd->vdev_ashift = mvd->vdev_ashift; |
789 | 658 |
|
659 |
vdev_remove_child(mvd, cvd); |
|
660 |
vdev_remove_child(pvd, mvd); |
|
661 |
cvd->vdev_id = mvd->vdev_id; |
|
662 |
vdev_add_child(pvd, cvd); |
|
2082 | 663 |
/* |
664 |
* If we created a new toplevel vdev, then we need to change the child's |
|
665 |
* vdev GUID to match the old toplevel vdev. Otherwise, we could have |
|
666 |
* detached an offline device, and when we go to import the pool we'll |
|
667 |
* think we have two toplevel vdevs, instead of a different version of |
|
668 |
* the same toplevel vdev. |
|
669 |
*/ |
|
670 |
if (cvd->vdev_top == cvd) { |
|
671 |
pvd->vdev_guid_sum -= cvd->vdev_guid; |
|
672 |
cvd->vdev_guid_sum -= cvd->vdev_guid; |
|
673 |
cvd->vdev_guid = mvd->vdev_guid; |
|
674 |
cvd->vdev_guid_sum += mvd->vdev_guid; |
|
675 |
pvd->vdev_guid_sum += cvd->vdev_guid; |
|
676 |
} |
|
789 | 677 |
vdev_top_update(cvd->vdev_top, cvd->vdev_top); |
678 |
||
679 |
if (cvd == cvd->vdev_top) |
|
680 |
vdev_top_transfer(mvd, cvd); |
|
681 |
||
682 |
ASSERT(mvd->vdev_children == 0); |
|
683 |
vdev_free(mvd); |
|
684 |
} |
|
685 |
||
1544 | 686 |
int |
789 | 687 |
vdev_metaslab_init(vdev_t *vd, uint64_t txg) |
688 |
{ |
|
689 |
spa_t *spa = vd->vdev_spa; |
|
1732 | 690 |
objset_t *mos = spa->spa_meta_objset; |
789 | 691 |
metaslab_class_t *mc = spa_metaslab_class_select(spa); |
1732 | 692 |
uint64_t m; |
789 | 693 |
uint64_t oldc = vd->vdev_ms_count; |
694 |
uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; |
|
1732 | 695 |
metaslab_t **mspp; |
696 |
int error; |
|
789 | 697 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
698 |
if (vd->vdev_ms_shift == 0) /* not being allocated from yet */ |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
699 |
return (0); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
700 |
|
789 | 701 |
dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc); |
702 |
||
703 |
ASSERT(oldc <= newc); |
|
704 |
||
1732 | 705 |
if (vd->vdev_mg == NULL) |
706 |
vd->vdev_mg = metaslab_group_create(mc, vd); |
|
707 |
||
708 |
mspp = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); |
|
709 |
||
710 |
if (oldc != 0) { |
|
711 |
bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); |
|
712 |
kmem_free(vd->vdev_ms, oldc * sizeof (*mspp)); |
|
713 |
} |
|
714 |
||
715 |
vd->vdev_ms = mspp; |
|
789 | 716 |
vd->vdev_ms_count = newc; |
717 |
||
1732 | 718 |
for (m = oldc; m < newc; m++) { |
719 |
space_map_obj_t smo = { 0, 0, 0 }; |
|
789 | 720 |
if (txg == 0) { |
1732 | 721 |
uint64_t object = 0; |
722 |
error = dmu_read(mos, vd->vdev_ms_array, |
|
723 |
m * sizeof (uint64_t), sizeof (uint64_t), &object); |
|
724 |
if (error) |
|
725 |
return (error); |
|
726 |
if (object != 0) { |
|
727 |
dmu_buf_t *db; |
|
728 |
error = dmu_bonus_hold(mos, object, FTAG, &db); |
|
729 |
if (error) |
|
730 |
return (error); |
|
731 |
ASSERT3U(db->db_size, ==, sizeof (smo)); |
|
732 |
bcopy(db->db_data, &smo, db->db_size); |
|
733 |
ASSERT3U(smo.smo_object, ==, object); |
|
1544 | 734 |
dmu_buf_rele(db, FTAG); |
789 | 735 |
} |
736 |
} |
|
1732 | 737 |
vd->vdev_ms[m] = metaslab_init(vd->vdev_mg, &smo, |
738 |
m << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg); |
|
789 | 739 |
} |
740 |
||
1544 | 741 |
return (0); |
789 | 742 |
} |
743 |
||
744 |
void |
|
745 |
vdev_metaslab_fini(vdev_t *vd) |
|
746 |
{ |
|
747 |
uint64_t m; |
|
748 |
uint64_t count = vd->vdev_ms_count; |
|
749 |
||
750 |
if (vd->vdev_ms != NULL) { |
|
751 |
for (m = 0; m < count; m++) |
|
1732 | 752 |
if (vd->vdev_ms[m] != NULL) |
753 |
metaslab_fini(vd->vdev_ms[m]); |
|
789 | 754 |
kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); |
755 |
vd->vdev_ms = NULL; |
|
756 |
} |
|
757 |
} |
|
758 |
||
759 |
/* |
|
760 |
* Prepare a virtual device for access. |
|
761 |
*/ |
|
762 |
int |
|
763 |
vdev_open(vdev_t *vd) |
|
764 |
{ |
|
765 |
int error; |
|
766 |
vdev_knob_t *vk; |
|
767 |
int c; |
|
768 |
uint64_t osize = 0; |
|
769 |
uint64_t asize, psize; |
|
1732 | 770 |
uint64_t ashift = 0; |
789 | 771 |
|
772 |
ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || |
|
773 |
vd->vdev_state == VDEV_STATE_CANT_OPEN || |
|
774 |
vd->vdev_state == VDEV_STATE_OFFLINE); |
|
775 |
||
776 |
if (vd->vdev_fault_mode == VDEV_FAULT_COUNT) |
|
777 |
vd->vdev_fault_arg >>= 1; |
|
778 |
else |
|
779 |
vd->vdev_fault_mode = VDEV_FAULT_NONE; |
|
780 |
||
781 |
vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
|
782 |
||
783 |
for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { |
|
784 |
uint64_t *valp = (uint64_t *)((char *)vd + vk->vk_offset); |
|
785 |
||
786 |
*valp = vk->vk_default; |
|
787 |
*valp = MAX(*valp, vk->vk_min); |
|
788 |
*valp = MIN(*valp, vk->vk_max); |
|
789 |
} |
|
790 |
||
791 |
if (vd->vdev_ops->vdev_op_leaf) { |
|
792 |
vdev_cache_init(vd); |
|
793 |
vdev_queue_init(vd); |
|
794 |
vd->vdev_cache_active = B_TRUE; |
|
795 |
} |
|
796 |
||
797 |
if (vd->vdev_offline) { |
|
798 |
ASSERT(vd->vdev_children == 0); |
|
1544 | 799 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); |
789 | 800 |
return (ENXIO); |
801 |
} |
|
802 |
||
803 |
error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); |
|
804 |
||
1544 | 805 |
if (zio_injection_enabled && error == 0) |
806 |
error = zio_handle_device_injection(vd, ENXIO); |
|
807 |
||
789 | 808 |
dprintf("%s = %d, osize %llu, state = %d\n", |
809 |
vdev_description(vd), error, osize, vd->vdev_state); |
|
810 |
||
811 |
if (error) { |
|
1544 | 812 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
789 | 813 |
vd->vdev_stat.vs_aux); |
814 |
return (error); |
|
815 |
} |
|
816 |
||
817 |
vd->vdev_state = VDEV_STATE_HEALTHY; |
|
818 |
||
819 |
for (c = 0; c < vd->vdev_children; c++) |
|
1544 | 820 |
if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) { |
821 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, |
|
822 |
VDEV_AUX_NONE); |
|
823 |
break; |
|
824 |
} |
|
789 | 825 |
|
826 |
osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); |
|
827 |
||
828 |
if (vd->vdev_children == 0) { |
|
829 |
if (osize < SPA_MINDEVSIZE) { |
|
1544 | 830 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
831 |
VDEV_AUX_TOO_SMALL); |
|
789 | 832 |
return (EOVERFLOW); |
833 |
} |
|
834 |
psize = osize; |
|
835 |
asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); |
|
836 |
} else { |
|
1732 | 837 |
if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE - |
789 | 838 |
(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { |
1544 | 839 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
840 |
VDEV_AUX_TOO_SMALL); |
|
789 | 841 |
return (EOVERFLOW); |
842 |
} |
|
843 |
psize = 0; |
|
844 |
asize = osize; |
|
845 |
} |
|
846 |
||
847 |
vd->vdev_psize = psize; |
|
848 |
||
849 |
if (vd->vdev_asize == 0) { |
|
850 |
/* |
|
851 |
* This is the first-ever open, so use the computed values. |
|
1732 | 852 |
* For testing purposes, a higher ashift can be requested. |
789 | 853 |
*/ |
854 |
vd->vdev_asize = asize; |
|
1732 | 855 |
vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); |
789 | 856 |
} else { |
857 |
/* |
|
858 |
* Make sure the alignment requirement hasn't increased. |
|
859 |
*/ |
|
1732 | 860 |
if (ashift > vd->vdev_top->vdev_ashift) { |
1544 | 861 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
862 |
VDEV_AUX_BAD_LABEL); |
|
789 | 863 |
return (EINVAL); |
864 |
} |
|
865 |
||
866 |
/* |
|
867 |
* Make sure the device hasn't shrunk. |
|
868 |
*/ |
|
869 |
if (asize < vd->vdev_asize) { |
|
1544 | 870 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
871 |
VDEV_AUX_BAD_LABEL); |
|
789 | 872 |
return (EINVAL); |
873 |
} |
|
874 |
||
875 |
/* |
|
876 |
* If all children are healthy and the asize has increased, |
|
877 |
* then we've experienced dynamic LUN growth. |
|
878 |
*/ |
|
879 |
if (vd->vdev_state == VDEV_STATE_HEALTHY && |
|
880 |
asize > vd->vdev_asize) { |
|
881 |
vd->vdev_asize = asize; |
|
882 |
} |
|
883 |
} |
|
884 |
||
1544 | 885 |
/* |
2082 | 886 |
* If this is a top-level vdev, compute the raidz-deflation |
887 |
* ratio. Note, we hard-code in 128k (1<<17) because it is the |
|
888 |
* current "typical" blocksize. Even if SPA_MAXBLOCKSIZE |
|
889 |
* changes, this algorithm must never change, or we will |
|
890 |
* inconsistently account for existing bp's. |
|
891 |
*/ |
|
892 |
if (vd->vdev_top == vd) { |
|
893 |
vd->vdev_deflate_ratio = (1<<17) / |
|
894 |
(vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT); |
|
895 |
} |
|
896 |
||
897 |
/* |
|
1544 | 898 |
* This allows the ZFS DE to close cases appropriately. If a device |
899 |
* goes away and later returns, we want to close the associated case. |
|
900 |
* But it's not enough to simply post this only when a device goes from |
|
901 |
* CANT_OPEN -> HEALTHY. If we reboot the system and the device is |
|
902 |
* back, we also need to close the case (otherwise we will try to replay |
|
903 |
* it). So we have to post this notifier every time. Since this only |
|
904 |
* occurs during pool open or error recovery, this should not be an |
|
905 |
* issue. |
|
906 |
*/ |
|
907 |
zfs_post_ok(vd->vdev_spa, vd); |
|
908 |
||
789 | 909 |
return (0); |
910 |
} |
|
911 |
||
912 |
/* |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
913 |
* Called once the vdevs are all opened, this routine validates the label |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
914 |
* contents. This needs to be done before vdev_load() so that we don't |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
915 |
* inadvertently do repair I/Os to the wrong device, and so that vdev_reopen() |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
916 |
* won't succeed if the device has been changed underneath. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
917 |
* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
918 |
* This function will only return failure if one of the vdevs indicates that it |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
919 |
* has since been destroyed or exported. This is only possible if |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
920 |
* /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
921 |
* will be updated but the function will return 0. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
922 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
923 |
int |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
924 |
vdev_validate(vdev_t *vd) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
925 |
{ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
926 |
spa_t *spa = vd->vdev_spa; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
927 |
int c; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
928 |
nvlist_t *label; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
929 |
uint64_t guid; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
930 |
uint64_t state; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
931 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
932 |
for (c = 0; c < vd->vdev_children; c++) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
933 |
if (vdev_validate(vd->vdev_child[c]) != 0) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
934 |
return (-1); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
935 |
|
2174
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
936 |
/* |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
937 |
* If the device has already failed, or was marked offline, don't do |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
938 |
* any further validation. Otherwise, label I/O will fail and we will |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
939 |
* overwrite the previous state. |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
940 |
*/ |
73de7a781492
6433717 offline devices should not be marked persistently unavailble
eschrock
parents:
2082
diff
changeset
|
941 |
if (vd->vdev_ops->vdev_op_leaf && !vdev_is_dead(vd)) { |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
942 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
943 |
if ((label = vdev_label_read_config(vd)) == NULL) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
944 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
945 |
VDEV_AUX_BAD_LABEL); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
946 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
947 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
948 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
949 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
950 |
&guid) != 0 || guid != spa_guid(spa)) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
951 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
952 |
VDEV_AUX_CORRUPT_DATA); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
953 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
954 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
955 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
956 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
957 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
958 |
&guid) != 0 || guid != vd->vdev_guid) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
959 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
960 |
VDEV_AUX_CORRUPT_DATA); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
961 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
962 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
963 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
964 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
965 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
966 |
&state) != 0) { |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
967 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
968 |
VDEV_AUX_CORRUPT_DATA); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
969 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
970 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
971 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
972 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
973 |
nvlist_free(label); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
974 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
975 |
if (spa->spa_load_state == SPA_LOAD_OPEN && |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
976 |
state != POOL_STATE_ACTIVE) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
977 |
return (-1); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
978 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
979 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
980 |
/* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
981 |
* If we were able to open and validate a vdev that was previously |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
982 |
* marked permanently unavailable, clear that state now. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
983 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
984 |
if (vd->vdev_not_present) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
985 |
vd->vdev_not_present = 0; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
986 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
987 |
return (0); |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
988 |
} |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
989 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
990 |
/* |
789 | 991 |
* Close a virtual device. |
992 |
*/ |
|
993 |
void |
|
994 |
vdev_close(vdev_t *vd) |
|
995 |
{ |
|
996 |
vd->vdev_ops->vdev_op_close(vd); |
|
997 |
||
998 |
if (vd->vdev_cache_active) { |
|
999 |
vdev_cache_fini(vd); |
|
1000 |
vdev_queue_fini(vd); |
|
1001 |
vd->vdev_cache_active = B_FALSE; |
|
1002 |
} |
|
1003 |
||
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1004 |
/* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1005 |
* We record the previous state before we close it, so that if we are |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1006 |
* doing a reopen(), we don't generate FMA ereports if we notice that |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1007 |
* it's still faulted. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1008 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1009 |
vd->vdev_prevstate = vd->vdev_state; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1010 |
|
789 | 1011 |
if (vd->vdev_offline) |
1012 |
vd->vdev_state = VDEV_STATE_OFFLINE; |
|
1013 |
else |
|
1014 |
vd->vdev_state = VDEV_STATE_CLOSED; |
|
1544 | 1015 |
vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
789 | 1016 |
} |
1017 |
||
1018 |
void |
|
1544 | 1019 |
vdev_reopen(vdev_t *vd) |
789 | 1020 |
{ |
1544 | 1021 |
spa_t *spa = vd->vdev_spa; |
789 | 1022 |
|
1544 | 1023 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
1024 |
||
789 | 1025 |
vdev_close(vd); |
1026 |
(void) vdev_open(vd); |
|
1027 |
||
1028 |
/* |
|
1029 |
* Reassess root vdev's health. |
|
1030 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1031 |
vdev_propagate_state(spa->spa_root_vdev); |
789 | 1032 |
} |
1033 |
||
1034 |
int |
|
2082 | 1035 |
vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing) |
789 | 1036 |
{ |
1037 |
int error; |
|
1038 |
||
1039 |
/* |
|
1040 |
* Normally, partial opens (e.g. of a mirror) are allowed. |
|
1041 |
* For a create, however, we want to fail the request if |
|
1042 |
* there are any components we can't open. |
|
1043 |
*/ |
|
1044 |
error = vdev_open(vd); |
|
1045 |
||
1046 |
if (error || vd->vdev_state != VDEV_STATE_HEALTHY) { |
|
1047 |
vdev_close(vd); |
|
1048 |
return (error ? error : ENXIO); |
|
1049 |
} |
|
1050 |
||
1051 |
/* |
|
1052 |
* Recursively initialize all labels. |
|
1053 |
*/ |
|
2082 | 1054 |
if ((error = vdev_label_init(vd, txg, isreplacing)) != 0) { |
789 | 1055 |
vdev_close(vd); |
1056 |
return (error); |
|
1057 |
} |
|
1058 |
||
1059 |
return (0); |
|
1060 |
} |
|
1061 |
||
1062 |
/* |
|
1063 |
* The is the latter half of vdev_create(). It is distinct because it |
|
1064 |
* involves initiating transactions in order to do metaslab creation. |
|
1065 |
* For creation, we want to try to create all vdevs at once and then undo it |
|
1066 |
* if anything fails; this is much harder if we have pending transactions. |
|
1067 |
*/ |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1068 |
void |
789 | 1069 |
vdev_init(vdev_t *vd, uint64_t txg) |
1070 |
{ |
|
1071 |
/* |
|
1072 |
* Aim for roughly 200 metaslabs per vdev. |
|
1073 |
*/ |
|
1074 |
vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); |
|
1075 |
vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); |
|
1076 |
||
1077 |
/* |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1078 |
* Initialize the vdev's metaslabs. This can't fail because |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1079 |
* there's nothing to read when creating all new metaslabs. |
789 | 1080 |
*/ |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1081 |
VERIFY(vdev_metaslab_init(vd, txg) == 0); |
789 | 1082 |
} |
1083 |
||
1084 |
void |
|
1732 | 1085 |
vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) |
789 | 1086 |
{ |
1732 | 1087 |
ASSERT(vd == vd->vdev_top); |
1088 |
ASSERT(ISP2(flags)); |
|
789 | 1089 |
|
1732 | 1090 |
if (flags & VDD_METASLAB) |
1091 |
(void) txg_list_add(&vd->vdev_ms_list, arg, txg); |
|
1092 |
||
1093 |
if (flags & VDD_DTL) |
|
1094 |
(void) txg_list_add(&vd->vdev_dtl_list, arg, txg); |
|
1095 |
||
1096 |
(void) txg_list_add(&vd->vdev_spa->spa_vdev_txg_list, vd, txg); |
|
789 | 1097 |
} |
1098 |
||
1099 |
void |
|
1100 |
vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size) |
|
1101 |
{ |
|
1102 |
mutex_enter(sm->sm_lock); |
|
1103 |
if (!space_map_contains(sm, txg, size)) |
|
1104 |
space_map_add(sm, txg, size); |
|
1105 |
mutex_exit(sm->sm_lock); |
|
1106 |
} |
|
1107 |
||
1108 |
int |
|
1109 |
vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size) |
|
1110 |
{ |
|
1111 |
int dirty; |
|
1112 |
||
1113 |
/* |
|
1114 |
* Quick test without the lock -- covers the common case that |
|
1115 |
* there are no dirty time segments. |
|
1116 |
*/ |
|
1117 |
if (sm->sm_space == 0) |
|
1118 |
return (0); |
|
1119 |
||
1120 |
mutex_enter(sm->sm_lock); |
|
1121 |
dirty = space_map_contains(sm, txg, size); |
|
1122 |
mutex_exit(sm->sm_lock); |
|
1123 |
||
1124 |
return (dirty); |
|
1125 |
} |
|
1126 |
||
1127 |
/* |
|
1128 |
* Reassess DTLs after a config change or scrub completion. |
|
1129 |
*/ |
|
1130 |
void |
|
1131 |
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) |
|
1132 |
{ |
|
1544 | 1133 |
spa_t *spa = vd->vdev_spa; |
789 | 1134 |
int c; |
1135 |
||
1544 | 1136 |
ASSERT(spa_config_held(spa, RW_WRITER)); |
789 | 1137 |
|
1138 |
if (vd->vdev_children == 0) { |
|
1139 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1140 |
/* |
|
1141 |
* We're successfully scrubbed everything up to scrub_txg. |
|
1142 |
* Therefore, excise all old DTLs up to that point, then |
|
1143 |
* fold in the DTLs for everything we couldn't scrub. |
|
1144 |
*/ |
|
1145 |
if (scrub_txg != 0) { |
|
1146 |
space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg); |
|
1147 |
space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub); |
|
1148 |
} |
|
1149 |
if (scrub_done) |
|
1150 |
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
1151 |
mutex_exit(&vd->vdev_dtl_lock); |
|
1732 | 1152 |
if (txg != 0) |
1153 |
vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg); |
|
789 | 1154 |
return; |
1155 |
} |
|
1156 |
||
1544 | 1157 |
/* |
1158 |
* Make sure the DTLs are always correct under the scrub lock. |
|
1159 |
*/ |
|
1160 |
if (vd == spa->spa_root_vdev) |
|
1161 |
mutex_enter(&spa->spa_scrub_lock); |
|
1162 |
||
789 | 1163 |
mutex_enter(&vd->vdev_dtl_lock); |
1164 |
space_map_vacate(&vd->vdev_dtl_map, NULL, NULL); |
|
1165 |
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL); |
|
1166 |
mutex_exit(&vd->vdev_dtl_lock); |
|
1167 |
||
1168 |
for (c = 0; c < vd->vdev_children; c++) { |
|
1169 |
vdev_t *cvd = vd->vdev_child[c]; |
|
1170 |
vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done); |
|
1171 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1172 |
space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map); |
|
1173 |
space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub); |
|
1174 |
mutex_exit(&vd->vdev_dtl_lock); |
|
1175 |
} |
|
1544 | 1176 |
|
1177 |
if (vd == spa->spa_root_vdev) |
|
1178 |
mutex_exit(&spa->spa_scrub_lock); |
|
789 | 1179 |
} |
1180 |
||
1181 |
static int |
|
1182 |
vdev_dtl_load(vdev_t *vd) |
|
1183 |
{ |
|
1184 |
spa_t *spa = vd->vdev_spa; |
|
1185 |
space_map_obj_t *smo = &vd->vdev_dtl; |
|
1732 | 1186 |
objset_t *mos = spa->spa_meta_objset; |
789 | 1187 |
dmu_buf_t *db; |
1188 |
int error; |
|
1189 |
||
1190 |
ASSERT(vd->vdev_children == 0); |
|
1191 |
||
1192 |
if (smo->smo_object == 0) |
|
1193 |
return (0); |
|
1194 |
||
1732 | 1195 |
if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0) |
1544 | 1196 |
return (error); |
1732 | 1197 |
|
789 | 1198 |
ASSERT3U(db->db_size, ==, sizeof (*smo)); |
1199 |
bcopy(db->db_data, smo, db->db_size); |
|
1544 | 1200 |
dmu_buf_rele(db, FTAG); |
789 | 1201 |
|
1202 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1732 | 1203 |
error = space_map_load(&vd->vdev_dtl_map, NULL, SM_ALLOC, smo, mos); |
789 | 1204 |
mutex_exit(&vd->vdev_dtl_lock); |
1205 |
||
1206 |
return (error); |
|
1207 |
} |
|
1208 |
||
1209 |
void |
|
1210 |
vdev_dtl_sync(vdev_t *vd, uint64_t txg) |
|
1211 |
{ |
|
1212 |
spa_t *spa = vd->vdev_spa; |
|
1213 |
space_map_obj_t *smo = &vd->vdev_dtl; |
|
1214 |
space_map_t *sm = &vd->vdev_dtl_map; |
|
1732 | 1215 |
objset_t *mos = spa->spa_meta_objset; |
789 | 1216 |
space_map_t smsync; |
1217 |
kmutex_t smlock; |
|
1218 |
dmu_buf_t *db; |
|
1219 |
dmu_tx_t *tx; |
|
1220 |
||
1221 |
dprintf("%s in txg %llu pass %d\n", |
|
1222 |
vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); |
|
1223 |
||
1224 |
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); |
|
1225 |
||
1226 |
if (vd->vdev_detached) { |
|
1227 |
if (smo->smo_object != 0) { |
|
1732 | 1228 |
int err = dmu_object_free(mos, smo->smo_object, tx); |
789 | 1229 |
ASSERT3U(err, ==, 0); |
1230 |
smo->smo_object = 0; |
|
1231 |
} |
|
1232 |
dmu_tx_commit(tx); |
|
1732 | 1233 |
dprintf("detach %s committed in txg %llu\n", |
1234 |
vdev_description(vd), txg); |
|
789 | 1235 |
return; |
1236 |
} |
|
1237 |
||
1238 |
if (smo->smo_object == 0) { |
|
1239 |
ASSERT(smo->smo_objsize == 0); |
|
1240 |
ASSERT(smo->smo_alloc == 0); |
|
1732 | 1241 |
smo->smo_object = dmu_object_alloc(mos, |
789 | 1242 |
DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, |
1243 |
DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); |
|
1244 |
ASSERT(smo->smo_object != 0); |
|
1245 |
vdev_config_dirty(vd->vdev_top); |
|
1246 |
} |
|
1247 |
||
1248 |
mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL); |
|
1249 |
||
1250 |
space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift, |
|
1251 |
&smlock); |
|
1252 |
||
1253 |
mutex_enter(&smlock); |
|
1254 |
||
1255 |
mutex_enter(&vd->vdev_dtl_lock); |
|
1732 | 1256 |
space_map_walk(sm, space_map_add, &smsync); |
789 | 1257 |
mutex_exit(&vd->vdev_dtl_lock); |
1258 |
||
1732 | 1259 |
space_map_truncate(smo, mos, tx); |
1260 |
space_map_sync(&smsync, SM_ALLOC, smo, mos, tx); |
|
789 | 1261 |
|
1262 |
space_map_destroy(&smsync); |
|
1263 |
||
1264 |
mutex_exit(&smlock); |
|
1265 |
mutex_destroy(&smlock); |
|
1266 |
||
1732 | 1267 |
VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); |
789 | 1268 |
dmu_buf_will_dirty(db, tx); |
1269 |
ASSERT3U(db->db_size, ==, sizeof (*smo)); |
|
1270 |
bcopy(smo, db->db_data, db->db_size); |
|
1544 | 1271 |
dmu_buf_rele(db, FTAG); |
789 | 1272 |
|
1273 |
dmu_tx_commit(tx); |
|
1274 |
} |
|
1275 |
||
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1276 |
void |
1544 | 1277 |
vdev_load(vdev_t *vd) |
789 | 1278 |
{ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1279 |
int c; |
789 | 1280 |
|
1281 |
/* |
|
1282 |
* Recursively load all children. |
|
1283 |
*/ |
|
1284 |
for (c = 0; c < vd->vdev_children; c++) |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1285 |
vdev_load(vd->vdev_child[c]); |
789 | 1286 |
|
1287 |
/* |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1288 |
* If this is a top-level vdev, initialize its metaslabs. |
789 | 1289 |
*/ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1290 |
if (vd == vd->vdev_top && |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1291 |
(vd->vdev_ashift == 0 || vd->vdev_asize == 0 || |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1292 |
vdev_metaslab_init(vd, 0) != 0)) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1293 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1294 |
VDEV_AUX_CORRUPT_DATA); |
789 | 1295 |
|
1296 |
/* |
|
1297 |
* If this is a leaf vdev, load its DTL. |
|
1298 |
*/ |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1299 |
if (vd->vdev_ops->vdev_op_leaf && vdev_dtl_load(vd) != 0) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1300 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1301 |
VDEV_AUX_CORRUPT_DATA); |
789 | 1302 |
} |
1303 |
||
2082 | 1304 |
/* |
1305 |
* This special case of vdev_spare() is used for hot spares. It's sole purpose |
|
1306 |
* it to set the vdev state for the associated vdev. To do this, we make sure |
|
1307 |
* that we can open the underlying device, then try to read the label, and make |
|
1308 |
* sure that the label is sane and that it hasn't been repurposed to another |
|
1309 |
* pool. |
|
1310 |
*/ |
|
1311 |
int |
|
1312 |
vdev_validate_spare(vdev_t *vd) |
|
1313 |
{ |
|
1314 |
nvlist_t *label; |
|
1315 |
uint64_t guid, version; |
|
1316 |
uint64_t state; |
|
1317 |
||
1318 |
if ((label = vdev_label_read_config(vd)) == NULL) { |
|
1319 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
|
1320 |
VDEV_AUX_CORRUPT_DATA); |
|
1321 |
return (-1); |
|
1322 |
} |
|
1323 |
||
1324 |
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 || |
|
1325 |
version > ZFS_VERSION || |
|
1326 |
nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 || |
|
1327 |
guid != vd->vdev_guid || |
|
1328 |
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) { |
|
1329 |
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, |
|
1330 |
VDEV_AUX_CORRUPT_DATA); |
|
1331 |
nvlist_free(label); |
|
1332 |
return (-1); |
|
1333 |
} |
|
1334 |
||
1335 |
/* |
|
1336 |
* We don't actually check the pool state here. If it's in fact in |
|
1337 |
* use by another pool, we update this fact on the fly when requested. |
|
1338 |
*/ |
|
1339 |
nvlist_free(label); |
|
1340 |
return (0); |
|
1341 |
} |
|
1342 |
||
789 | 1343 |
void |
1344 |
vdev_sync_done(vdev_t *vd, uint64_t txg) |
|
1345 |
{ |
|
1346 |
metaslab_t *msp; |
|
1347 |
||
1348 |
dprintf("%s txg %llu\n", vdev_description(vd), txg); |
|
1349 |
||
1350 |
while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) |
|
1351 |
metaslab_sync_done(msp, txg); |
|
1352 |
} |
|
1353 |
||
1354 |
void |
|
1355 |
vdev_sync(vdev_t *vd, uint64_t txg) |
|
1356 |
{ |
|
1357 |
spa_t *spa = vd->vdev_spa; |
|
1358 |
vdev_t *lvd; |
|
1359 |
metaslab_t *msp; |
|
1732 | 1360 |
dmu_tx_t *tx; |
789 | 1361 |
|
1362 |
dprintf("%s txg %llu pass %d\n", |
|
1363 |
vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa)); |
|
1364 |
||
1732 | 1365 |
if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) { |
1366 |
ASSERT(vd == vd->vdev_top); |
|
1367 |
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); |
|
1368 |
vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset, |
|
1369 |
DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx); |
|
1370 |
ASSERT(vd->vdev_ms_array != 0); |
|
1371 |
vdev_config_dirty(vd); |
|
1372 |
dmu_tx_commit(tx); |
|
1373 |
} |
|
789 | 1374 |
|
1732 | 1375 |
while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) { |
789 | 1376 |
metaslab_sync(msp, txg); |
1732 | 1377 |
(void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg)); |
1378 |
} |
|
789 | 1379 |
|
1380 |
while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL) |
|
1381 |
vdev_dtl_sync(lvd, txg); |
|
1382 |
||
1383 |
(void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)); |
|
1384 |
} |
|
1385 |
||
1386 |
uint64_t |
|
1387 |
vdev_psize_to_asize(vdev_t *vd, uint64_t psize) |
|
1388 |
{ |
|
1389 |
return (vd->vdev_ops->vdev_op_asize(vd, psize)); |
|
1390 |
} |
|
1391 |
||
1392 |
void |
|
1393 |
vdev_io_start(zio_t *zio) |
|
1394 |
{ |
|
1395 |
zio->io_vd->vdev_ops->vdev_op_io_start(zio); |
|
1396 |
} |
|
1397 |
||
1398 |
void |
|
1399 |
vdev_io_done(zio_t *zio) |
|
1400 |
{ |
|
1401 |
zio->io_vd->vdev_ops->vdev_op_io_done(zio); |
|
1402 |
} |
|
1403 |
||
1404 |
const char * |
|
1405 |
vdev_description(vdev_t *vd) |
|
1406 |
{ |
|
1407 |
if (vd == NULL || vd->vdev_ops == NULL) |
|
1408 |
return ("<unknown>"); |
|
1409 |
||
1410 |
if (vd->vdev_path != NULL) |
|
1411 |
return (vd->vdev_path); |
|
1412 |
||
1413 |
if (vd->vdev_parent == NULL) |
|
1414 |
return (spa_name(vd->vdev_spa)); |
|
1415 |
||
1416 |
return (vd->vdev_ops->vdev_op_type); |
|
1417 |
} |
|
1418 |
||
1419 |
int |
|
1544 | 1420 |
vdev_online(spa_t *spa, uint64_t guid) |
789 | 1421 |
{ |
1485 | 1422 |
vdev_t *rvd, *vd; |
1423 |
uint64_t txg; |
|
789 | 1424 |
|
1485 | 1425 |
txg = spa_vdev_enter(spa); |
1426 |
||
1427 |
rvd = spa->spa_root_vdev; |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1428 |
|
1544 | 1429 |
if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) |
1485 | 1430 |
return (spa_vdev_exit(spa, NULL, txg, ENODEV)); |
789 | 1431 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1432 |
if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1433 |
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1434 |
|
789 | 1435 |
dprintf("ONLINE: %s\n", vdev_description(vd)); |
1436 |
||
1437 |
vd->vdev_offline = B_FALSE; |
|
1485 | 1438 |
vd->vdev_tmpoffline = B_FALSE; |
1544 | 1439 |
vdev_reopen(vd->vdev_top); |
789 | 1440 |
|
1485 | 1441 |
vdev_config_dirty(vd->vdev_top); |
1442 |
||
1443 |
(void) spa_vdev_exit(spa, NULL, txg, 0); |
|
789 | 1444 |
|
1445 |
VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0); |
|
1446 |
||
1447 |
return (0); |
|
1448 |
} |
|
1449 |
||
1450 |
int |
|
1544 | 1451 |
vdev_offline(spa_t *spa, uint64_t guid, int istmp) |
789 | 1452 |
{ |
1485 | 1453 |
vdev_t *rvd, *vd; |
1454 |
uint64_t txg; |
|
789 | 1455 |
|
1485 | 1456 |
txg = spa_vdev_enter(spa); |
789 | 1457 |
|
1485 | 1458 |
rvd = spa->spa_root_vdev; |
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1459 |
|
1544 | 1460 |
if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) |
1485 | 1461 |
return (spa_vdev_exit(spa, NULL, txg, ENODEV)); |
789 | 1462 |
|
1585
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1463 |
if (!vd->vdev_ops->vdev_op_leaf) |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1464 |
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); |
4ad213e858a9
6395480 ztest ASSERT: rbt.bt_objset == wbt.bt_objset, line 2041
bonwick
parents:
1544
diff
changeset
|
1465 |
|
789 | 1466 |
dprintf("OFFLINE: %s\n", vdev_description(vd)); |
1467 |
||
1468 |
/* |
|
1732 | 1469 |
* If the device isn't already offline, try to offline it. |
789 | 1470 |
*/ |
1732 | 1471 |
if (!vd->vdev_offline) { |
1472 |
/* |
|
1473 |
* If this device's top-level vdev has a non-empty DTL, |
|
1474 |
* don't allow the device to be offlined. |
|
1475 |
* |
|
1476 |
* XXX -- make this more precise by allowing the offline |
|
1477 |
* as long as the remaining devices don't have any DTL holes. |
|
1478 |
*/ |
|
1479 |
if (vd->vdev_top->vdev_dtl_map.sm_space != 0) |
|
1480 |
return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
|
789 | 1481 |
|
1732 | 1482 |
/* |
1483 |
* Offline this device and reopen its top-level vdev. |
|
1484 |
* If this action results in the top-level vdev becoming |
|
1485 |
* unusable, undo it and fail the request. |
|
1486 |
*/ |
|
1487 |
vd->vdev_offline = B_TRUE; |
|
1544 | 1488 |
vdev_reopen(vd->vdev_top); |
1732 | 1489 |
if (vdev_is_dead(vd->vdev_top)) { |
1490 |
vd->vdev_offline = B_FALSE; |
|
1491 |
vdev_reopen(vd->vdev_top); |
|
1492 |
return (spa_vdev_exit(spa, NULL, txg, EBUSY)); |
|
1493 |
} |
|
789 | 1494 |
} |
1495 |
||
1485 | 1496 |
vd->vdev_tmpoffline = istmp; |
1732 | 1497 |
|
1498 |
vdev_config_dirty(vd->vdev_top); |
|
1485 | 1499 |
|
1500 |
return (spa_vdev_exit(spa, NULL, txg, 0)); |
|
789 | 1501 |
} |
1502 |
||
1544 | 1503 |
/* |
1504 |
* Clear the error counts associated with this vdev. Unlike vdev_online() and |
|
1505 |
* vdev_offline(), we assume the spa config is locked. We also clear all |
|
1506 |
* children. If 'vd' is NULL, then the user wants to clear all vdevs. |
|
1507 |
*/ |
|
1508 |
void |
|
1509 |
vdev_clear(spa_t *spa, vdev_t *vd) |
|
789 | 1510 |
{ |
1544 | 1511 |
int c; |
789 | 1512 |
|
1544 | 1513 |
if (vd == NULL) |
1514 |
vd = spa->spa_root_vdev; |
|
789 | 1515 |
|
1544 | 1516 |
vd->vdev_stat.vs_read_errors = 0; |
1517 |
vd->vdev_stat.vs_write_errors = 0; |
|
1518 |
vd->vdev_stat.vs_checksum_errors = 0; |
|
789 | 1519 |
|
1544 | 1520 |
for (c = 0; c < vd->vdev_children; c++) |
1521 |
vdev_clear(spa, vd->vdev_child[c]); |
|
789 | 1522 |
} |
1523 |
||
1524 |
int |
|
1525 |
vdev_is_dead(vdev_t *vd) |
|
1526 |
{ |
|
1527 |
return (vd->vdev_state <= VDEV_STATE_CANT_OPEN); |
|
1528 |
} |
|
1529 |
||
1530 |
int |
|
1531 |
vdev_error_inject(vdev_t *vd, zio_t *zio) |
|
1532 |
{ |
|
1533 |
int error = 0; |
|
1534 |
||
1535 |
if (vd->vdev_fault_mode == VDEV_FAULT_NONE) |
|
1536 |
return (0); |
|
1537 |
||
1538 |
if (((1ULL << zio->io_type) & vd->vdev_fault_mask) == 0) |
|
1539 |
return (0); |
|
1540 |
||
1541 |
switch (vd->vdev_fault_mode) { |
|
1542 |
case VDEV_FAULT_RANDOM: |
|
1543 |
if (spa_get_random(vd->vdev_fault_arg) == 0) |
|
1544 |
error = EIO; |
|
1545 |
break; |
|
1546 |
||
1547 |
case VDEV_FAULT_COUNT: |
|
1548 |
if ((int64_t)--vd->vdev_fault_arg <= 0) |
|
1549 |
vd->vdev_fault_mode = VDEV_FAULT_NONE; |
|
1550 |
error = EIO; |
|
1551 |
break; |
|
1552 |
} |
|
1553 |
||
1554 |
if (error != 0) { |
|
1555 |
dprintf("returning %d for type %d on %s state %d offset %llx\n", |
|
1556 |
error, zio->io_type, vdev_description(vd), |
|
1557 |
vd->vdev_state, zio->io_offset); |
|
1558 |
} |
|
1559 |
||
1560 |
return (error); |
|
1561 |
} |
|
1562 |
||
1563 |
/* |
|
1564 |
* Get statistics for the given vdev. |
|
1565 |
*/ |
|
1566 |
void |
|
1567 |
vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) |
|
1568 |
{ |
|
1569 |
vdev_t *rvd = vd->vdev_spa->spa_root_vdev; |
|
1570 |
int c, t; |
|
1571 |
||
1572 |
mutex_enter(&vd->vdev_stat_lock); |
|
1573 |
bcopy(&vd->vdev_stat, vs, sizeof (*vs)); |
|
1574 |
vs->vs_timestamp = gethrtime() - vs->vs_timestamp; |
|
1575 |
vs->vs_state = vd->vdev_state; |
|
1175
759d20c7e57b
6366265 attach/replace should allow a new device size at least the min of all devs in a mirror/raidz
lling
parents:
1171
diff
changeset
|
1576 |
vs->vs_rsize = vdev_get_rsize(vd); |
789 | 1577 |
mutex_exit(&vd->vdev_stat_lock); |
1578 |
||
1579 |
/* |
|
1580 |
* If we're getting stats on the root vdev, aggregate the I/O counts |
|
1581 |
* over all top-level vdevs (i.e. the direct children of the root). |
|
1582 |
*/ |
|
1583 |
if (vd == rvd) { |
|
1584 |
for (c = 0; c < rvd->vdev_children; c++) { |
|
1585 |
vdev_t *cvd = rvd->vdev_child[c]; |
|
1586 |
vdev_stat_t *cvs = &cvd->vdev_stat; |
|
1587 |
||
1588 |
mutex_enter(&vd->vdev_stat_lock); |
|
1589 |
for (t = 0; t < ZIO_TYPES; t++) { |
|
1590 |
vs->vs_ops[t] += cvs->vs_ops[t]; |
|
1591 |
vs->vs_bytes[t] += cvs->vs_bytes[t]; |
|
1592 |
} |
|
1593 |
vs->vs_read_errors += cvs->vs_read_errors; |
|
1594 |
vs->vs_write_errors += cvs->vs_write_errors; |
|
1595 |
vs->vs_checksum_errors += cvs->vs_checksum_errors; |
|
1596 |
vs->vs_scrub_examined += cvs->vs_scrub_examined; |
|
1597 |
vs->vs_scrub_errors += cvs->vs_scrub_errors; |
|
1598 |
mutex_exit(&vd->vdev_stat_lock); |
|
1599 |
} |
|
1600 |
} |
|
1601 |
} |
|
1602 |
||
1603 |
void |
|
1604 |
vdev_stat_update(zio_t *zio) |
|
1605 |
{ |
|
1606 |
vdev_t *vd = zio->io_vd; |
|
1607 |
vdev_t *pvd; |
|
1608 |
uint64_t txg = zio->io_txg; |
|
1609 |
vdev_stat_t *vs = &vd->vdev_stat; |
|
1610 |
zio_type_t type = zio->io_type; |
|
1611 |
int flags = zio->io_flags; |
|
1612 |
||
1613 |
if (zio->io_error == 0) { |
|
1614 |
if (!(flags & ZIO_FLAG_IO_BYPASS)) { |
|
1615 |
mutex_enter(&vd->vdev_stat_lock); |
|
1616 |
vs->vs_ops[type]++; |
|
1617 |
vs->vs_bytes[type] += zio->io_size; |
|
1618 |
mutex_exit(&vd->vdev_stat_lock); |
|
1619 |
} |
|
1620 |
if ((flags & ZIO_FLAG_IO_REPAIR) && |
|
1621 |
zio->io_delegate_list == NULL) { |
|
1622 |
mutex_enter(&vd->vdev_stat_lock); |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1623 |
if (flags & ZIO_FLAG_SCRUB_THREAD) |
789 | 1624 |
vs->vs_scrub_repaired += zio->io_size; |
1625 |
else |
|
1626 |
vs->vs_self_healed += zio->io_size; |
|
1627 |
mutex_exit(&vd->vdev_stat_lock); |
|
1628 |
} |
|
1629 |
return; |
|
1630 |
} |
|
1631 |
||
1632 |
if (flags & ZIO_FLAG_SPECULATIVE) |
|
1633 |
return; |
|
1634 |
||
1635 |
if (!vdev_is_dead(vd)) { |
|
1636 |
mutex_enter(&vd->vdev_stat_lock); |
|
1637 |
if (type == ZIO_TYPE_READ) { |
|
1638 |
if (zio->io_error == ECKSUM) |
|
1639 |
vs->vs_checksum_errors++; |
|
1640 |
else |
|
1641 |
vs->vs_read_errors++; |
|
1642 |
} |
|
1643 |
if (type == ZIO_TYPE_WRITE) |
|
1644 |
vs->vs_write_errors++; |
|
1645 |
mutex_exit(&vd->vdev_stat_lock); |
|
1646 |
} |
|
1647 |
||
1648 |
if (type == ZIO_TYPE_WRITE) { |
|
1649 |
if (txg == 0 || vd->vdev_children != 0) |
|
1650 |
return; |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1651 |
if (flags & ZIO_FLAG_SCRUB_THREAD) { |
789 | 1652 |
ASSERT(flags & ZIO_FLAG_IO_REPAIR); |
1653 |
for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) |
|
1654 |
vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1); |
|
1655 |
} |
|
1656 |
if (!(flags & ZIO_FLAG_IO_REPAIR)) { |
|
1657 |
if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1)) |
|
1658 |
return; |
|
1732 | 1659 |
vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg); |
789 | 1660 |
for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) |
1661 |
vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1); |
|
1662 |
} |
|
1663 |
} |
|
1664 |
} |
|
1665 |
||
1666 |
void |
|
1667 |
vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete) |
|
1668 |
{ |
|
1669 |
int c; |
|
1670 |
vdev_stat_t *vs = &vd->vdev_stat; |
|
1671 |
||
1672 |
for (c = 0; c < vd->vdev_children; c++) |
|
1673 |
vdev_scrub_stat_update(vd->vdev_child[c], type, complete); |
|
1674 |
||
1675 |
mutex_enter(&vd->vdev_stat_lock); |
|
1676 |
||
1677 |
if (type == POOL_SCRUB_NONE) { |
|
1678 |
/* |
|
1679 |
* Update completion and end time. Leave everything else alone |
|
1680 |
* so we can report what happened during the previous scrub. |
|
1681 |
*/ |
|
1682 |
vs->vs_scrub_complete = complete; |
|
1683 |
vs->vs_scrub_end = gethrestime_sec(); |
|
1684 |
} else { |
|
1685 |
vs->vs_scrub_type = type; |
|
1686 |
vs->vs_scrub_complete = 0; |
|
1687 |
vs->vs_scrub_examined = 0; |
|
1688 |
vs->vs_scrub_repaired = 0; |
|
1689 |
vs->vs_scrub_errors = 0; |
|
1690 |
vs->vs_scrub_start = gethrestime_sec(); |
|
1691 |
vs->vs_scrub_end = 0; |
|
1692 |
} |
|
1693 |
||
1694 |
mutex_exit(&vd->vdev_stat_lock); |
|
1695 |
} |
|
1696 |
||
1697 |
/* |
|
1698 |
* Update the in-core space usage stats for this vdev and the root vdev. |
|
1699 |
*/ |
|
1700 |
void |
|
2082 | 1701 |
vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta) |
789 | 1702 |
{ |
1703 |
ASSERT(vd == vd->vdev_top); |
|
2082 | 1704 |
int64_t dspace_delta = space_delta; |
789 | 1705 |
|
1706 |
do { |
|
2082 | 1707 |
if (vd->vdev_ms_count) { |
1708 |
/* |
|
1709 |
* If this is a top-level vdev, apply the |
|
1710 |
* inverse of its psize-to-asize (ie. RAID-Z) |
|
1711 |
* space-expansion factor. We must calculate |
|
1712 |
* this here and not at the root vdev because |
|
1713 |
* the root vdev's psize-to-asize is simply the |
|
1714 |
* max of its childrens', thus not accurate |
|
1715 |
* enough for us. |
|
1716 |
*/ |
|
1717 |
ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0); |
|
1718 |
dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) * |
|
1719 |
vd->vdev_deflate_ratio; |
|
1720 |
} |
|
1721 |
||
789 | 1722 |
mutex_enter(&vd->vdev_stat_lock); |
1723 |
vd->vdev_stat.vs_space += space_delta; |
|
1724 |
vd->vdev_stat.vs_alloc += alloc_delta; |
|
2082 | 1725 |
vd->vdev_stat.vs_dspace += dspace_delta; |
789 | 1726 |
mutex_exit(&vd->vdev_stat_lock); |
1727 |
} while ((vd = vd->vdev_parent) != NULL); |
|
1728 |
} |
|
1729 |
||
1730 |
/* |
|
1731 |
* Various knobs to tune a vdev. |
|
1732 |
*/ |
|
1733 |
static vdev_knob_t vdev_knob[] = { |
|
1734 |
{ |
|
1735 |
"cache_size", |
|
1736 |
"size of the read-ahead cache", |
|
1737 |
0, |
|
1738 |
1ULL << 30, |
|
1739 |
10ULL << 20, |
|
1740 |
offsetof(struct vdev, vdev_cache.vc_size) |
|
1741 |
}, |
|
1742 |
{ |
|
1743 |
"cache_bshift", |
|
1744 |
"log2 of cache blocksize", |
|
1745 |
SPA_MINBLOCKSHIFT, |
|
1746 |
SPA_MAXBLOCKSHIFT, |
|
1747 |
16, |
|
1748 |
offsetof(struct vdev, vdev_cache.vc_bshift) |
|
1749 |
}, |
|
1750 |
{ |
|
1751 |
"cache_max", |
|
1752 |
"largest block size to cache", |
|
1753 |
0, |
|
1754 |
SPA_MAXBLOCKSIZE, |
|
1755 |
1ULL << 14, |
|
1756 |
offsetof(struct vdev, vdev_cache.vc_max) |
|
1757 |
}, |
|
1758 |
{ |
|
1759 |
"min_pending", |
|
1760 |
"minimum pending I/Os to the disk", |
|
1761 |
1, |
|
1762 |
10000, |
|
1763 |
2, |
|
1764 |
offsetof(struct vdev, vdev_queue.vq_min_pending) |
|
1765 |
}, |
|
1766 |
{ |
|
1767 |
"max_pending", |
|
1768 |
"maximum pending I/Os to the disk", |
|
1769 |
1, |
|
1770 |
10000, |
|
1771 |
35, |
|
1772 |
offsetof(struct vdev, vdev_queue.vq_max_pending) |
|
1773 |
}, |
|
1774 |
{ |
|
1544 | 1775 |
"scrub_limit", |
1776 |
"maximum scrub/resilver I/O queue", |
|
1777 |
0, |
|
1778 |
10000, |
|
1779 |
70, |
|
1780 |
offsetof(struct vdev, vdev_queue.vq_scrub_limit) |
|
1781 |
}, |
|
1782 |
{ |
|
789 | 1783 |
"agg_limit", |
1784 |
"maximum size of aggregated I/Os", |
|
1785 |
0, |
|
1786 |
SPA_MAXBLOCKSIZE, |
|
1787 |
SPA_MAXBLOCKSIZE, |
|
1788 |
offsetof(struct vdev, vdev_queue.vq_agg_limit) |
|
1789 |
}, |
|
1790 |
{ |
|
1791 |
"time_shift", |
|
1792 |
"deadline = pri + (lbolt >> time_shift)", |
|
1793 |
0, |
|
1794 |
63, |
|
1795 |
4, |
|
1796 |
offsetof(struct vdev, vdev_queue.vq_time_shift) |
|
1797 |
}, |
|
1798 |
{ |
|
1799 |
"ramp_rate", |
|
1800 |
"exponential I/O issue ramp-up rate", |
|
1801 |
1, |
|
1802 |
10000, |
|
1803 |
2, |
|
1804 |
offsetof(struct vdev, vdev_queue.vq_ramp_rate) |
|
1805 |
}, |
|
1806 |
}; |
|
1807 |
||
1808 |
vdev_knob_t * |
|
1809 |
vdev_knob_next(vdev_knob_t *vk) |
|
1810 |
{ |
|
1811 |
if (vk == NULL) |
|
1812 |
return (vdev_knob); |
|
1813 |
||
1814 |
if (++vk == vdev_knob + sizeof (vdev_knob) / sizeof (vdev_knob_t)) |
|
1815 |
return (NULL); |
|
1816 |
||
1817 |
return (vk); |
|
1818 |
} |
|
1819 |
||
1820 |
/* |
|
1821 |
* Mark a top-level vdev's config as dirty, placing it on the dirty list |
|
1822 |
* so that it will be written out next time the vdev configuration is synced. |
|
1823 |
* If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs. |
|
1824 |
*/ |
|
1825 |
void |
|
1826 |
vdev_config_dirty(vdev_t *vd) |
|
1827 |
{ |
|
1828 |
spa_t *spa = vd->vdev_spa; |
|
1829 |
vdev_t *rvd = spa->spa_root_vdev; |
|
1830 |
int c; |
|
1831 |
||
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1832 |
/* |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1833 |
* The dirty list is protected by the config lock. The caller must |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1834 |
* either hold the config lock as writer, or must be the sync thread |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1835 |
* (which holds the lock as reader). There's only one sync thread, |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1836 |
* so this is sufficient to ensure mutual exclusion. |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1837 |
*/ |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1838 |
ASSERT(spa_config_held(spa, RW_WRITER) || |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1839 |
dsl_pool_sync_context(spa_get_dsl(spa))); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1840 |
|
789 | 1841 |
if (vd == rvd) { |
1842 |
for (c = 0; c < rvd->vdev_children; c++) |
|
1843 |
vdev_config_dirty(rvd->vdev_child[c]); |
|
1844 |
} else { |
|
1845 |
ASSERT(vd == vd->vdev_top); |
|
1846 |
||
1732 | 1847 |
if (!list_link_active(&vd->vdev_dirty_node)) |
789 | 1848 |
list_insert_head(&spa->spa_dirty_list, vd); |
1849 |
} |
|
1850 |
} |
|
1851 |
||
1852 |
void |
|
1853 |
vdev_config_clean(vdev_t *vd) |
|
1854 |
{ |
|
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1855 |
spa_t *spa = vd->vdev_spa; |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1856 |
|
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1857 |
ASSERT(spa_config_held(spa, RW_WRITER) || |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1858 |
dsl_pool_sync_context(spa_get_dsl(spa))); |
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1859 |
|
1732 | 1860 |
ASSERT(list_link_active(&vd->vdev_dirty_node)); |
1601
438b928f80c7
6397197 ADVANCE_ZIL should only visit claimed-but-not-yet-replayed logs
bonwick
parents:
1585
diff
changeset
|
1861 |
list_remove(&spa->spa_dirty_list, vd); |
789 | 1862 |
} |
1863 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1864 |
void |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1865 |
vdev_propagate_state(vdev_t *vd) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1866 |
{ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1867 |
vdev_t *rvd = vd->vdev_spa->spa_root_vdev; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1868 |
int degraded = 0, faulted = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1869 |
int corrupted = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1870 |
int c; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1871 |
vdev_t *child; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1872 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1873 |
for (c = 0; c < vd->vdev_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1874 |
child = vd->vdev_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1875 |
if (child->vdev_state <= VDEV_STATE_CANT_OPEN) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1876 |
faulted++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1877 |
else if (child->vdev_state == VDEV_STATE_DEGRADED) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1878 |
degraded++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1879 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1880 |
if (child->vdev_stat.vs_aux == VDEV_AUX_CORRUPT_DATA) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1881 |
corrupted++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1882 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1883 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1884 |
vd->vdev_ops->vdev_op_state_change(vd, faulted, degraded); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1885 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1886 |
/* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1887 |
* Root special: if there is a toplevel vdev that cannot be |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1888 |
* opened due to corrupted metadata, then propagate the root |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1889 |
* vdev's aux state as 'corrupt' rather than 'insufficient |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1890 |
* replicas'. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1891 |
*/ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1892 |
if (corrupted && vd == rvd && rvd->vdev_state == VDEV_STATE_CANT_OPEN) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1893 |
vdev_set_state(rvd, B_FALSE, VDEV_STATE_CANT_OPEN, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1894 |
VDEV_AUX_CORRUPT_DATA); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1895 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1896 |
|
789 | 1897 |
/* |
1544 | 1898 |
* Set a vdev's state. If this is during an open, we don't update the parent |
1899 |
* state, because we're in the process of opening children depth-first. |
|
1900 |
* Otherwise, we propagate the change to the parent. |
|
1901 |
* |
|
1902 |
* If this routine places a device in a faulted state, an appropriate ereport is |
|
1903 |
* generated. |
|
789 | 1904 |
*/ |
1905 |
void |
|
1544 | 1906 |
vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) |
789 | 1907 |
{ |
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1908 |
uint64_t save_state; |
1544 | 1909 |
|
1910 |
if (state == vd->vdev_state) { |
|
1911 |
vd->vdev_stat.vs_aux = aux; |
|
789 | 1912 |
return; |
1544 | 1913 |
} |
1914 |
||
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1915 |
save_state = vd->vdev_state; |
789 | 1916 |
|
1917 |
vd->vdev_state = state; |
|
1918 |
vd->vdev_stat.vs_aux = aux; |
|
1919 |
||
1544 | 1920 |
if (state == VDEV_STATE_CANT_OPEN) { |
1921 |
/* |
|
1922 |
* If we fail to open a vdev during an import, we mark it as |
|
1923 |
* "not available", which signifies that it was never there to |
|
1924 |
* begin with. Failure to open such a device is not considered |
|
1925 |
* an error. |
|
1926 |
*/ |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1927 |
if (vd->vdev_spa->spa_load_state == SPA_LOAD_IMPORT && |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1928 |
vd->vdev_ops->vdev_op_leaf) |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1929 |
vd->vdev_not_present = 1; |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1930 |
|
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1931 |
/* |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1932 |
* Post the appropriate ereport. If the 'prevstate' field is |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1933 |
* set to something other than VDEV_STATE_UNKNOWN, it indicates |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1934 |
* that this is part of a vdev_reopen(). In this case, we don't |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1935 |
* want to post the ereport if the device was already in the |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1936 |
* CANT_OPEN state beforehand. |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1937 |
*/ |
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1938 |
if (vd->vdev_prevstate != state && !vd->vdev_not_present && |
1544 | 1939 |
vd != vd->vdev_spa->spa_root_vdev) { |
1940 |
const char *class; |
|
1941 |
||
1942 |
switch (aux) { |
|
1943 |
case VDEV_AUX_OPEN_FAILED: |
|
1944 |
class = FM_EREPORT_ZFS_DEVICE_OPEN_FAILED; |
|
1945 |
break; |
|
1946 |
case VDEV_AUX_CORRUPT_DATA: |
|
1947 |
class = FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA; |
|
1948 |
break; |
|
1949 |
case VDEV_AUX_NO_REPLICAS: |
|
1950 |
class = FM_EREPORT_ZFS_DEVICE_NO_REPLICAS; |
|
1951 |
break; |
|
1952 |
case VDEV_AUX_BAD_GUID_SUM: |
|
1953 |
class = FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM; |
|
1954 |
break; |
|
1955 |
case VDEV_AUX_TOO_SMALL: |
|
1956 |
class = FM_EREPORT_ZFS_DEVICE_TOO_SMALL; |
|
1957 |
break; |
|
1958 |
case VDEV_AUX_BAD_LABEL: |
|
1959 |
class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL; |
|
1960 |
break; |
|
1961 |
default: |
|
1962 |
class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; |
|
1963 |
} |
|
1964 |
||
1965 |
zfs_ereport_post(class, vd->vdev_spa, |
|
1986
628267397204
6424405 zpool import destroyed_pool can damage existing pool using same devices
eschrock
parents:
1807
diff
changeset
|
1966 |
vd, NULL, save_state, 0); |
1544 | 1967 |
} |
1968 |
} |
|
1969 |
||
1970 |
if (isopen) |
|
1971 |
return; |
|
1972 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1973 |
if (vd->vdev_parent != NULL) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1974 |
vdev_propagate_state(vd->vdev_parent); |
789 | 1975 |
} |