author | ck153898 |
Mon, 29 Oct 2007 22:45:33 -0700 | |
changeset 5378 | 111aa1baa84a |
parent 5329 | 33cb98223b2d |
child 5530 | 4ed96167d864 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
5329 | 22 |
* Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/zfs_context.h> |
|
29 |
#include <sys/spa.h> |
|
30 |
#include <sys/vdev_impl.h> |
|
31 |
#include <sys/zio.h> |
|
32 |
#include <sys/fs/zfs.h> |
|
33 |
||
34 |
/* |
|
35 |
* Virtual device vector for mirroring. |
|
36 |
*/ |
|
37 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
38 |
typedef struct mirror_child { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
39 |
vdev_t *mc_vd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
40 |
uint64_t mc_offset; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
41 |
int mc_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
42 |
short mc_tried; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
43 |
short mc_skipped; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
44 |
} mirror_child_t; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
45 |
|
789 | 46 |
typedef struct mirror_map { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
47 |
int mm_children; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
48 |
int mm_replacing; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
49 |
int mm_preferred; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
50 |
int mm_root; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
51 |
mirror_child_t mm_child[1]; |
789 | 52 |
} mirror_map_t; |
53 |
||
2391 | 54 |
int vdev_mirror_shift = 21; |
55 |
||
789 | 56 |
static mirror_map_t * |
57 |
vdev_mirror_map_alloc(zio_t *zio) |
|
58 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
59 |
mirror_map_t *mm = NULL; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
60 |
mirror_child_t *mc; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
61 |
vdev_t *vd = zio->io_vd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
62 |
int c, d; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
63 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
64 |
if (vd == NULL) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
65 |
dva_t *dva = zio->io_bp->blk_dva; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
66 |
spa_t *spa = zio->io_spa; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
67 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
68 |
c = BP_GET_NDVAS(zio->io_bp); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
69 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
70 |
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
71 |
mm->mm_children = c; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
72 |
mm->mm_replacing = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
73 |
mm->mm_preferred = spa_get_random(c); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
74 |
mm->mm_root = B_TRUE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
75 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
76 |
/* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
77 |
* Check the other, lower-index DVAs to see if they're on |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
78 |
* the same vdev as the child we picked. If they are, use |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
79 |
* them since they are likely to have been allocated from |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
80 |
* the primary metaslab in use at the time, and hence are |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
81 |
* more likely to have locality with single-copy data. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
82 |
*/ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
83 |
for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
84 |
if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c])) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
85 |
mm->mm_preferred = d; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
86 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
87 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
88 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
89 |
mc = &mm->mm_child[c]; |
2082 | 90 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
91 |
mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
92 |
mc->mc_offset = DVA_GET_OFFSET(&dva[c]); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
93 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
94 |
} else { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
95 |
c = vd->vdev_children; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
96 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
97 |
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
98 |
mm->mm_children = c; |
2082 | 99 |
mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || |
100 |
vd->vdev_ops == &vdev_spare_ops); |
|
2391 | 101 |
mm->mm_preferred = mm->mm_replacing ? 0 : |
102 |
(zio->io_offset >> vdev_mirror_shift) % c; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
103 |
mm->mm_root = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
104 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
105 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
106 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
107 |
mc->mc_vd = vd->vdev_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
108 |
mc->mc_offset = zio->io_offset; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
109 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
110 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
111 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
112 |
zio->io_vsd = mm; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
113 |
return (mm); |
789 | 114 |
} |
115 |
||
116 |
static void |
|
117 |
vdev_mirror_map_free(zio_t *zio) |
|
118 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
119 |
mirror_map_t *mm = zio->io_vsd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
120 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
121 |
kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children])); |
789 | 122 |
zio->io_vsd = NULL; |
123 |
} |
|
124 |
||
125 |
static int |
|
126 |
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) |
|
127 |
{ |
|
128 |
vdev_t *cvd; |
|
129 |
uint64_t c; |
|
130 |
int numerrors = 0; |
|
131 |
int ret, lasterror = 0; |
|
132 |
||
133 |
if (vd->vdev_children == 0) { |
|
134 |
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
135 |
return (EINVAL); |
|
136 |
} |
|
137 |
||
138 |
for (c = 0; c < vd->vdev_children; c++) { |
|
139 |
cvd = vd->vdev_child[c]; |
|
140 |
||
141 |
if ((ret = vdev_open(cvd)) != 0) { |
|
142 |
lasterror = ret; |
|
143 |
numerrors++; |
|
144 |
continue; |
|
145 |
} |
|
146 |
||
147 |
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; |
|
1732 | 148 |
*ashift = MAX(*ashift, cvd->vdev_ashift); |
789 | 149 |
} |
150 |
||
151 |
if (numerrors == vd->vdev_children) { |
|
152 |
vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; |
|
153 |
return (lasterror); |
|
154 |
} |
|
155 |
||
156 |
return (0); |
|
157 |
} |
|
158 |
||
159 |
static void |
|
160 |
vdev_mirror_close(vdev_t *vd) |
|
161 |
{ |
|
162 |
uint64_t c; |
|
163 |
||
164 |
for (c = 0; c < vd->vdev_children; c++) |
|
165 |
vdev_close(vd->vdev_child[c]); |
|
166 |
} |
|
167 |
||
168 |
static void |
|
169 |
vdev_mirror_child_done(zio_t *zio) |
|
170 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
171 |
mirror_child_t *mc = zio->io_private; |
789 | 172 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
173 |
mc->mc_error = zio->io_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
174 |
mc->mc_tried = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
175 |
mc->mc_skipped = 0; |
789 | 176 |
} |
177 |
||
178 |
static void |
|
179 |
vdev_mirror_scrub_done(zio_t *zio) |
|
180 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
181 |
mirror_child_t *mc = zio->io_private; |
789 | 182 |
|
183 |
if (zio->io_error == 0) { |
|
184 |
zio_t *pio = zio->io_parent; |
|
185 |
mutex_enter(&pio->io_lock); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
186 |
ASSERT3U(zio->io_size, >=, pio->io_size); |
789 | 187 |
bcopy(zio->io_data, pio->io_data, pio->io_size); |
188 |
mutex_exit(&pio->io_lock); |
|
189 |
} |
|
190 |
||
191 |
zio_buf_free(zio->io_data, zio->io_size); |
|
192 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
193 |
mc->mc_error = zio->io_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
194 |
mc->mc_tried = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
195 |
mc->mc_skipped = 0; |
789 | 196 |
} |
197 |
||
1732 | 198 |
static void |
199 |
vdev_mirror_repair_done(zio_t *zio) |
|
200 |
{ |
|
201 |
ASSERT(zio->io_private == zio->io_parent); |
|
202 |
vdev_mirror_map_free(zio->io_private); |
|
203 |
} |
|
204 |
||
789 | 205 |
/* |
206 |
* Try to find a child whose DTL doesn't contain the block we want to read. |
|
207 |
* If we can't, try the read on any vdev we haven't already tried. |
|
208 |
*/ |
|
209 |
static int |
|
210 |
vdev_mirror_child_select(zio_t *zio) |
|
211 |
{ |
|
212 |
mirror_map_t *mm = zio->io_vsd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
213 |
mirror_child_t *mc; |
789 | 214 |
uint64_t txg = zio->io_txg; |
215 |
int i, c; |
|
216 |
||
217 |
ASSERT(zio->io_bp == NULL || zio->io_bp->blk_birth == txg); |
|
218 |
||
219 |
/* |
|
220 |
* Try to find a child whose DTL doesn't contain the block to read. |
|
221 |
* If a child is known to be completely inaccessible (indicated by |
|
5329 | 222 |
* vdev_readable() returning B_FALSE), don't even try. |
789 | 223 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
224 |
for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
225 |
if (c >= mm->mm_children) |
789 | 226 |
c = 0; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
227 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
228 |
if (mc->mc_tried || mc->mc_skipped) |
789 | 229 |
continue; |
5329 | 230 |
if (vdev_is_dead(mc->mc_vd) && !vdev_readable(mc->mc_vd)) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
231 |
mc->mc_error = ENXIO; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
232 |
mc->mc_tried = 1; /* don't even try */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
233 |
mc->mc_skipped = 1; |
789 | 234 |
continue; |
235 |
} |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
236 |
if (!vdev_dtl_contains(&mc->mc_vd->vdev_dtl_map, txg, 1)) |
789 | 237 |
return (c); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
238 |
mc->mc_error = ESTALE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
239 |
mc->mc_skipped = 1; |
789 | 240 |
} |
241 |
||
242 |
/* |
|
243 |
* Every device is either missing or has this txg in its DTL. |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
244 |
* Look for any child we haven't already tried before giving up. |
789 | 245 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
246 |
for (c = 0; c < mm->mm_children; c++) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
247 |
if (!mm->mm_child[c].mc_tried) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
248 |
return (c); |
789 | 249 |
|
250 |
/* |
|
251 |
* Every child failed. There's no place left to look. |
|
252 |
*/ |
|
253 |
return (-1); |
|
254 |
} |
|
255 |
||
256 |
static void |
|
257 |
vdev_mirror_io_start(zio_t *zio) |
|
258 |
{ |
|
259 |
mirror_map_t *mm; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
260 |
mirror_child_t *mc; |
789 | 261 |
int c, children; |
262 |
||
263 |
mm = vdev_mirror_map_alloc(zio); |
|
264 |
||
265 |
if (zio->io_type == ZIO_TYPE_READ) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
266 |
if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) { |
789 | 267 |
/* |
268 |
* For scrubbing reads we need to allocate a read |
|
269 |
* buffer for each child and issue reads to all |
|
270 |
* children. If any child succeeds, it will copy its |
|
271 |
* data into zio->io_data in vdev_mirror_scrub_done. |
|
272 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
273 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
274 |
mc = &mm->mm_child[c]; |
789 | 275 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
276 |
mc->mc_vd, mc->mc_offset, |
789 | 277 |
zio_buf_alloc(zio->io_size), zio->io_size, |
278 |
zio->io_type, zio->io_priority, |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
279 |
ZIO_FLAG_CANFAIL, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
280 |
vdev_mirror_scrub_done, mc)); |
789 | 281 |
} |
282 |
zio_wait_children_done(zio); |
|
283 |
return; |
|
284 |
} |
|
285 |
/* |
|
286 |
* For normal reads just pick one child. |
|
287 |
*/ |
|
288 |
c = vdev_mirror_child_select(zio); |
|
289 |
children = (c >= 0); |
|
290 |
} else { |
|
291 |
ASSERT(zio->io_type == ZIO_TYPE_WRITE); |
|
292 |
||
293 |
/* |
|
294 |
* If this is a resilvering I/O to a replacing vdev, |
|
295 |
* only the last child should be written -- unless the |
|
296 |
* first child happens to have a DTL entry here as well. |
|
297 |
* All other writes go to all children. |
|
298 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
299 |
if ((zio->io_flags & ZIO_FLAG_RESILVER) && mm->mm_replacing && |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
300 |
!vdev_dtl_contains(&mm->mm_child[0].mc_vd->vdev_dtl_map, |
789 | 301 |
zio->io_txg, 1)) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
302 |
c = mm->mm_children - 1; |
789 | 303 |
children = 1; |
304 |
} else { |
|
305 |
c = 0; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
306 |
children = mm->mm_children; |
789 | 307 |
} |
308 |
} |
|
309 |
||
310 |
while (children--) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
311 |
mc = &mm->mm_child[c]; |
789 | 312 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
313 |
mc->mc_vd, mc->mc_offset, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
314 |
zio->io_data, zio->io_size, zio->io_type, zio->io_priority, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
315 |
ZIO_FLAG_CANFAIL, vdev_mirror_child_done, mc)); |
789 | 316 |
c++; |
317 |
} |
|
318 |
||
319 |
zio_wait_children_done(zio); |
|
320 |
} |
|
321 |
||
322 |
static void |
|
323 |
vdev_mirror_io_done(zio_t *zio) |
|
324 |
{ |
|
325 |
mirror_map_t *mm = zio->io_vsd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
326 |
mirror_child_t *mc; |
789 | 327 |
int c; |
328 |
int good_copies = 0; |
|
329 |
int unexpected_errors = 0; |
|
330 |
||
331 |
zio->io_error = 0; |
|
332 |
zio->io_numerrors = 0; |
|
333 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
334 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
335 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
336 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
337 |
if (mc->mc_tried && mc->mc_error == 0) { |
789 | 338 |
good_copies++; |
339 |
continue; |
|
340 |
} |
|
341 |
||
342 |
/* |
|
343 |
* We preserve any EIOs because those may be worth retrying; |
|
344 |
* whereas ECKSUM and ENXIO are more likely to be persistent. |
|
345 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
346 |
if (mc->mc_error) { |
789 | 347 |
if (zio->io_error != EIO) |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
348 |
zio->io_error = mc->mc_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
349 |
if (!mc->mc_skipped) |
789 | 350 |
unexpected_errors++; |
351 |
zio->io_numerrors++; |
|
352 |
} |
|
353 |
} |
|
354 |
||
355 |
if (zio->io_type == ZIO_TYPE_WRITE) { |
|
356 |
/* |
|
357 |
* XXX -- for now, treat partial writes as success. |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
358 |
* XXX -- For a replacing vdev, we need to make sure the |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
359 |
* new child succeeds. |
789 | 360 |
*/ |
361 |
/* XXPOLICY */ |
|
362 |
if (good_copies != 0) |
|
363 |
zio->io_error = 0; |
|
364 |
vdev_mirror_map_free(zio); |
|
365 |
zio_next_stage(zio); |
|
366 |
return; |
|
367 |
} |
|
368 |
||
369 |
ASSERT(zio->io_type == ZIO_TYPE_READ); |
|
370 |
||
371 |
/* |
|
372 |
* If we don't have a good copy yet, keep trying other children. |
|
373 |
*/ |
|
374 |
/* XXPOLICY */ |
|
375 |
if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
376 |
ASSERT(c >= 0 && c < mm->mm_children); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
377 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
378 |
dprintf("retrying i/o (err=%d) on child %s\n", |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
379 |
zio->io_error, vdev_description(mc->mc_vd)); |
789 | 380 |
zio->io_error = 0; |
381 |
zio_vdev_io_redone(zio); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
382 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
383 |
mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, |
789 | 384 |
ZIO_TYPE_READ, zio->io_priority, ZIO_FLAG_CANFAIL, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
385 |
vdev_mirror_child_done, mc)); |
789 | 386 |
zio_wait_children_done(zio); |
387 |
return; |
|
388 |
} |
|
389 |
||
390 |
/* XXPOLICY */ |
|
391 |
if (good_copies) |
|
392 |
zio->io_error = 0; |
|
393 |
else |
|
394 |
ASSERT(zio->io_error != 0); |
|
395 |
||
396 |
if (good_copies && (spa_mode & FWRITE) && |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
397 |
(unexpected_errors || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
398 |
(zio->io_flags & ZIO_FLAG_RESILVER) || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
399 |
((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) { |
1732 | 400 |
zio_t *rio; |
401 |
||
789 | 402 |
/* |
403 |
* Use the good data we have in hand to repair damaged children. |
|
1732 | 404 |
* |
405 |
* We issue all repair I/Os as children of 'rio' to arrange |
|
406 |
* that vdev_mirror_map_free(zio) will be invoked after all |
|
407 |
* repairs complete, but before we advance to the next stage. |
|
789 | 408 |
*/ |
1732 | 409 |
rio = zio_null(zio, zio->io_spa, |
410 |
vdev_mirror_repair_done, zio, ZIO_FLAG_CANFAIL); |
|
411 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
412 |
for (c = 0; c < mm->mm_children; c++) { |
789 | 413 |
/* |
414 |
* Don't rewrite known good children. |
|
415 |
* Not only is it unnecessary, it could |
|
416 |
* actually be harmful: if the system lost |
|
417 |
* power while rewriting the only good copy, |
|
418 |
* there would be no good copies left! |
|
419 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
420 |
mc = &mm->mm_child[c]; |
789 | 421 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
422 |
if (mc->mc_error == 0) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
423 |
if (mc->mc_tried) |
789 | 424 |
continue; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
425 |
if (!(zio->io_flags & ZIO_FLAG_SCRUB) && |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
426 |
!vdev_dtl_contains(&mc->mc_vd->vdev_dtl_map, |
789 | 427 |
zio->io_txg, 1)) |
428 |
continue; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
429 |
mc->mc_error = ESTALE; |
789 | 430 |
} |
431 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
432 |
dprintf("resilvered %s @ 0x%llx error %d\n", |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
433 |
vdev_description(mc->mc_vd), mc->mc_offset, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
434 |
mc->mc_error); |
789 | 435 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
436 |
zio_nowait(zio_vdev_child_io(rio, zio->io_bp, mc->mc_vd, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
437 |
mc->mc_offset, zio->io_data, zio->io_size, |
789 | 438 |
ZIO_TYPE_WRITE, zio->io_priority, |
439 |
ZIO_FLAG_IO_REPAIR | ZIO_FLAG_CANFAIL | |
|
440 |
ZIO_FLAG_DONT_PROPAGATE, NULL, NULL)); |
|
441 |
} |
|
1732 | 442 |
|
443 |
zio_nowait(rio); |
|
444 |
zio_wait_children_done(zio); |
|
445 |
return; |
|
789 | 446 |
} |
447 |
||
448 |
vdev_mirror_map_free(zio); |
|
449 |
zio_next_stage(zio); |
|
450 |
} |
|
451 |
||
452 |
static void |
|
453 |
vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) |
|
454 |
{ |
|
455 |
if (faulted == vd->vdev_children) |
|
1544 | 456 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
457 |
VDEV_AUX_NO_REPLICAS); |
|
789 | 458 |
else if (degraded + faulted != 0) |
1544 | 459 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); |
789 | 460 |
else |
1544 | 461 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); |
789 | 462 |
} |
463 |
||
464 |
vdev_ops_t vdev_mirror_ops = { |
|
465 |
vdev_mirror_open, |
|
466 |
vdev_mirror_close, |
|
5329 | 467 |
NULL, |
789 | 468 |
vdev_default_asize, |
469 |
vdev_mirror_io_start, |
|
470 |
vdev_mirror_io_done, |
|
471 |
vdev_mirror_state_change, |
|
472 |
VDEV_TYPE_MIRROR, /* name of this vdev type */ |
|
473 |
B_FALSE /* not a leaf vdev */ |
|
474 |
}; |
|
475 |
||
476 |
vdev_ops_t vdev_replacing_ops = { |
|
477 |
vdev_mirror_open, |
|
478 |
vdev_mirror_close, |
|
5329 | 479 |
NULL, |
789 | 480 |
vdev_default_asize, |
481 |
vdev_mirror_io_start, |
|
482 |
vdev_mirror_io_done, |
|
483 |
vdev_mirror_state_change, |
|
484 |
VDEV_TYPE_REPLACING, /* name of this vdev type */ |
|
485 |
B_FALSE /* not a leaf vdev */ |
|
486 |
}; |
|
2082 | 487 |
|
488 |
vdev_ops_t vdev_spare_ops = { |
|
489 |
vdev_mirror_open, |
|
490 |
vdev_mirror_close, |
|
5329 | 491 |
NULL, |
2082 | 492 |
vdev_default_asize, |
493 |
vdev_mirror_io_start, |
|
494 |
vdev_mirror_io_done, |
|
495 |
vdev_mirror_state_change, |
|
496 |
VDEV_TYPE_SPARE, /* name of this vdev type */ |
|
497 |
B_FALSE /* not a leaf vdev */ |
|
498 |
}; |