author | Chris Kirby <Chris.Kirby@oracle.com> |
Thu, 10 Jun 2010 15:46:47 -0600 | |
changeset 12605 | 6790e683d5a5 |
parent 11958 | 575ffe1e978d |
child 13570 | 3411fd5f1589 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
22 |
* Copyright 2010 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#include <sys/zfs_context.h> |
|
27 |
#include <sys/spa.h> |
|
28 |
#include <sys/vdev_impl.h> |
|
29 |
#include <sys/zio.h> |
|
30 |
#include <sys/fs/zfs.h> |
|
31 |
||
32 |
/* |
|
33 |
* Virtual device vector for mirroring. |
|
34 |
*/ |
|
35 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
36 |
typedef struct mirror_child { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
37 |
vdev_t *mc_vd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
38 |
uint64_t mc_offset; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
39 |
int mc_error; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
40 |
uint8_t mc_tried; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
41 |
uint8_t mc_skipped; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
42 |
uint8_t mc_speculative; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
43 |
} mirror_child_t; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
44 |
|
789 | 45 |
typedef struct mirror_map { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
46 |
int mm_children; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
47 |
int mm_replacing; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
48 |
int mm_preferred; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
49 |
int mm_root; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
50 |
mirror_child_t mm_child[1]; |
789 | 51 |
} mirror_map_t; |
52 |
||
2391 | 53 |
int vdev_mirror_shift = 21; |
54 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
55 |
static void |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
56 |
vdev_mirror_map_free(zio_t *zio) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
57 |
{ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
58 |
mirror_map_t *mm = zio->io_vsd; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
59 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
60 |
kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children])); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
61 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
62 |
|
10614
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
63 |
static const zio_vsd_ops_t vdev_mirror_vsd_ops = { |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
64 |
vdev_mirror_map_free, |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
65 |
zio_vsd_default_cksum_report |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
66 |
}; |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
67 |
|
789 | 68 |
static mirror_map_t * |
69 |
vdev_mirror_map_alloc(zio_t *zio) |
|
70 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
71 |
mirror_map_t *mm = NULL; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
72 |
mirror_child_t *mc; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
73 |
vdev_t *vd = zio->io_vd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
74 |
int c, d; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
75 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
76 |
if (vd == NULL) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
77 |
dva_t *dva = zio->io_bp->blk_dva; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
78 |
spa_t *spa = zio->io_spa; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
79 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
80 |
c = BP_GET_NDVAS(zio->io_bp); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
81 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
82 |
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
83 |
mm->mm_children = c; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
84 |
mm->mm_replacing = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
85 |
mm->mm_preferred = spa_get_random(c); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
86 |
mm->mm_root = B_TRUE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
87 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
88 |
/* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
89 |
* Check the other, lower-index DVAs to see if they're on |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
90 |
* the same vdev as the child we picked. If they are, use |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
91 |
* them since they are likely to have been allocated from |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
92 |
* the primary metaslab in use at the time, and hence are |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
93 |
* more likely to have locality with single-copy data. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
94 |
*/ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
95 |
for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
96 |
if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c])) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
97 |
mm->mm_preferred = d; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
98 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
99 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
100 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
101 |
mc = &mm->mm_child[c]; |
2082 | 102 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
103 |
mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
104 |
mc->mc_offset = DVA_GET_OFFSET(&dva[c]); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
105 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
106 |
} else { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
107 |
c = vd->vdev_children; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
108 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
109 |
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
110 |
mm->mm_children = c; |
2082 | 111 |
mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || |
112 |
vd->vdev_ops == &vdev_spare_ops); |
|
2391 | 113 |
mm->mm_preferred = mm->mm_replacing ? 0 : |
114 |
(zio->io_offset >> vdev_mirror_shift) % c; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
115 |
mm->mm_root = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
116 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
117 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
118 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
119 |
mc->mc_vd = vd->vdev_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
120 |
mc->mc_offset = zio->io_offset; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
121 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
122 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
123 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
124 |
zio->io_vsd = mm; |
10614
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
125 |
zio->io_vsd_ops = &vdev_mirror_vsd_ops; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
126 |
return (mm); |
789 | 127 |
} |
128 |
||
129 |
static int |
|
130 |
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) |
|
131 |
{ |
|
132 |
int numerrors = 0; |
|
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
133 |
int lasterror = 0; |
789 | 134 |
|
135 |
if (vd->vdev_children == 0) { |
|
136 |
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
137 |
return (EINVAL); |
|
138 |
} |
|
139 |
||
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
140 |
vdev_open_children(vd); |
789 | 141 |
|
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
142 |
for (int c = 0; c < vd->vdev_children; c++) { |
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
143 |
vdev_t *cvd = vd->vdev_child[c]; |
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
144 |
|
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
145 |
if (cvd->vdev_open_error) { |
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
146 |
lasterror = cvd->vdev_open_error; |
789 | 147 |
numerrors++; |
148 |
continue; |
|
149 |
} |
|
150 |
||
151 |
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; |
|
1732 | 152 |
*ashift = MAX(*ashift, cvd->vdev_ashift); |
789 | 153 |
} |
154 |
||
155 |
if (numerrors == vd->vdev_children) { |
|
156 |
vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; |
|
157 |
return (lasterror); |
|
158 |
} |
|
159 |
||
160 |
return (0); |
|
161 |
} |
|
162 |
||
163 |
static void |
|
164 |
vdev_mirror_close(vdev_t *vd) |
|
165 |
{ |
|
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
166 |
for (int c = 0; c < vd->vdev_children; c++) |
789 | 167 |
vdev_close(vd->vdev_child[c]); |
168 |
} |
|
169 |
||
170 |
static void |
|
171 |
vdev_mirror_child_done(zio_t *zio) |
|
172 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
173 |
mirror_child_t *mc = zio->io_private; |
789 | 174 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
175 |
mc->mc_error = zio->io_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
176 |
mc->mc_tried = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
177 |
mc->mc_skipped = 0; |
789 | 178 |
} |
179 |
||
180 |
static void |
|
181 |
vdev_mirror_scrub_done(zio_t *zio) |
|
182 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
183 |
mirror_child_t *mc = zio->io_private; |
789 | 184 |
|
185 |
if (zio->io_error == 0) { |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
186 |
zio_t *pio; |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
187 |
|
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
188 |
mutex_enter(&zio->io_lock); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
189 |
while ((pio = zio_walk_parents(zio)) != NULL) { |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
190 |
mutex_enter(&pio->io_lock); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
191 |
ASSERT3U(zio->io_size, >=, pio->io_size); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
192 |
bcopy(zio->io_data, pio->io_data, pio->io_size); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
193 |
mutex_exit(&pio->io_lock); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
194 |
} |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
195 |
mutex_exit(&zio->io_lock); |
789 | 196 |
} |
197 |
||
198 |
zio_buf_free(zio->io_data, zio->io_size); |
|
199 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
200 |
mc->mc_error = zio->io_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
201 |
mc->mc_tried = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
202 |
mc->mc_skipped = 0; |
789 | 203 |
} |
204 |
||
205 |
/* |
|
206 |
* Try to find a child whose DTL doesn't contain the block we want to read. |
|
207 |
* If we can't, try the read on any vdev we haven't already tried. |
|
208 |
*/ |
|
209 |
static int |
|
210 |
vdev_mirror_child_select(zio_t *zio) |
|
211 |
{ |
|
212 |
mirror_map_t *mm = zio->io_vsd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
213 |
mirror_child_t *mc; |
789 | 214 |
uint64_t txg = zio->io_txg; |
215 |
int i, c; |
|
216 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10614
diff
changeset
|
217 |
ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg); |
789 | 218 |
|
219 |
/* |
|
220 |
* Try to find a child whose DTL doesn't contain the block to read. |
|
221 |
* If a child is known to be completely inaccessible (indicated by |
|
5329 | 222 |
* vdev_readable() returning B_FALSE), don't even try. |
789 | 223 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
224 |
for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
225 |
if (c >= mm->mm_children) |
789 | 226 |
c = 0; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
227 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
228 |
if (mc->mc_tried || mc->mc_skipped) |
789 | 229 |
continue; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
230 |
if (!vdev_readable(mc->mc_vd)) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
231 |
mc->mc_error = ENXIO; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
232 |
mc->mc_tried = 1; /* don't even try */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
233 |
mc->mc_skipped = 1; |
789 | 234 |
continue; |
235 |
} |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
236 |
if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) |
789 | 237 |
return (c); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
238 |
mc->mc_error = ESTALE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
239 |
mc->mc_skipped = 1; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
240 |
mc->mc_speculative = 1; |
789 | 241 |
} |
242 |
||
243 |
/* |
|
244 |
* Every device is either missing or has this txg in its DTL. |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
245 |
* Look for any child we haven't already tried before giving up. |
789 | 246 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
247 |
for (c = 0; c < mm->mm_children; c++) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
248 |
if (!mm->mm_child[c].mc_tried) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
249 |
return (c); |
789 | 250 |
|
251 |
/* |
|
252 |
* Every child failed. There's no place left to look. |
|
253 |
*/ |
|
254 |
return (-1); |
|
255 |
} |
|
256 |
||
5530 | 257 |
static int |
789 | 258 |
vdev_mirror_io_start(zio_t *zio) |
259 |
{ |
|
260 |
mirror_map_t *mm; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
261 |
mirror_child_t *mc; |
789 | 262 |
int c, children; |
263 |
||
264 |
mm = vdev_mirror_map_alloc(zio); |
|
265 |
||
266 |
if (zio->io_type == ZIO_TYPE_READ) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
267 |
if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) { |
789 | 268 |
/* |
269 |
* For scrubbing reads we need to allocate a read |
|
270 |
* buffer for each child and issue reads to all |
|
271 |
* children. If any child succeeds, it will copy its |
|
272 |
* data into zio->io_data in vdev_mirror_scrub_done. |
|
273 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
274 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
275 |
mc = &mm->mm_child[c]; |
789 | 276 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
277 |
mc->mc_vd, mc->mc_offset, |
789 | 278 |
zio_buf_alloc(zio->io_size), zio->io_size, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
279 |
zio->io_type, zio->io_priority, 0, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
280 |
vdev_mirror_scrub_done, mc)); |
789 | 281 |
} |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
282 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 283 |
} |
284 |
/* |
|
285 |
* For normal reads just pick one child. |
|
286 |
*/ |
|
287 |
c = vdev_mirror_child_select(zio); |
|
288 |
children = (c >= 0); |
|
289 |
} else { |
|
290 |
ASSERT(zio->io_type == ZIO_TYPE_WRITE); |
|
291 |
||
292 |
/* |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
293 |
* Writes go to all children. |
789 | 294 |
*/ |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
295 |
c = 0; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
296 |
children = mm->mm_children; |
789 | 297 |
} |
298 |
||
299 |
while (children--) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
300 |
mc = &mm->mm_child[c]; |
789 | 301 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
302 |
mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
303 |
zio->io_type, zio->io_priority, 0, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
304 |
vdev_mirror_child_done, mc)); |
789 | 305 |
c++; |
306 |
} |
|
307 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
308 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 309 |
} |
310 |
||
5530 | 311 |
static int |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
312 |
vdev_mirror_worst_error(mirror_map_t *mm) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
313 |
{ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
314 |
int error[2] = { 0, 0 }; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
315 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
316 |
for (int c = 0; c < mm->mm_children; c++) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
317 |
mirror_child_t *mc = &mm->mm_child[c]; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
318 |
int s = mc->mc_speculative; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
319 |
error[s] = zio_worst_error(error[s], mc->mc_error); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
320 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
321 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
322 |
return (error[0] ? error[0] : error[1]); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
323 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
324 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
325 |
static void |
789 | 326 |
vdev_mirror_io_done(zio_t *zio) |
327 |
{ |
|
328 |
mirror_map_t *mm = zio->io_vsd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
329 |
mirror_child_t *mc; |
789 | 330 |
int c; |
331 |
int good_copies = 0; |
|
332 |
int unexpected_errors = 0; |
|
333 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
334 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
335 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
336 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
337 |
if (mc->mc_error) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
338 |
if (!mc->mc_skipped) |
789 | 339 |
unexpected_errors++; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
340 |
} else if (mc->mc_tried) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
341 |
good_copies++; |
789 | 342 |
} |
343 |
} |
|
344 |
||
345 |
if (zio->io_type == ZIO_TYPE_WRITE) { |
|
346 |
/* |
|
347 |
* XXX -- for now, treat partial writes as success. |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
348 |
* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
349 |
* Now that we support write reallocation, it would be better |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
350 |
* to treat partial failure as real failure unless there are |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
351 |
* no non-degraded top-level vdevs left, and not update DTLs |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
352 |
* if we intend to reallocate. |
789 | 353 |
*/ |
354 |
/* XXPOLICY */ |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
355 |
if (good_copies != mm->mm_children) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
356 |
/* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
357 |
* Always require at least one good copy. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
358 |
* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
359 |
* For ditto blocks (io_vd == NULL), require |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
360 |
* all copies to be good. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
361 |
* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
362 |
* XXX -- for replacing vdevs, there's no great answer. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
363 |
* If the old device is really dead, we may not even |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
364 |
* be able to access it -- so we only want to |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
365 |
* require good writes to the new device. But if |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
366 |
* the new device turns out to be flaky, we want |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
367 |
* to be able to detach it -- which requires all |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
368 |
* writes to the old device to have succeeded. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
369 |
*/ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
370 |
if (good_copies == 0 || zio->io_vd == NULL) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
371 |
zio->io_error = vdev_mirror_worst_error(mm); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
372 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
373 |
return; |
789 | 374 |
} |
375 |
||
376 |
ASSERT(zio->io_type == ZIO_TYPE_READ); |
|
377 |
||
378 |
/* |
|
379 |
* If we don't have a good copy yet, keep trying other children. |
|
380 |
*/ |
|
381 |
/* XXPOLICY */ |
|
382 |
if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
383 |
ASSERT(c >= 0 && c < mm->mm_children); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
384 |
mc = &mm->mm_child[c]; |
789 | 385 |
zio_vdev_io_redone(zio); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
386 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
387 |
mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
388 |
ZIO_TYPE_READ, zio->io_priority, 0, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
389 |
vdev_mirror_child_done, mc)); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
390 |
return; |
789 | 391 |
} |
392 |
||
393 |
/* XXPOLICY */ |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
394 |
if (good_copies == 0) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
395 |
zio->io_error = vdev_mirror_worst_error(mm); |
789 | 396 |
ASSERT(zio->io_error != 0); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
397 |
} |
789 | 398 |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
399 |
if (good_copies && spa_writeable(zio->io_spa) && |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
400 |
(unexpected_errors || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
401 |
(zio->io_flags & ZIO_FLAG_RESILVER) || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
402 |
((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) { |
789 | 403 |
/* |
404 |
* Use the good data we have in hand to repair damaged children. |
|
405 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
406 |
for (c = 0; c < mm->mm_children; c++) { |
789 | 407 |
/* |
408 |
* Don't rewrite known good children. |
|
409 |
* Not only is it unnecessary, it could |
|
410 |
* actually be harmful: if the system lost |
|
411 |
* power while rewriting the only good copy, |
|
412 |
* there would be no good copies left! |
|
413 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
414 |
mc = &mm->mm_child[c]; |
789 | 415 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
416 |
if (mc->mc_error == 0) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
417 |
if (mc->mc_tried) |
789 | 418 |
continue; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
419 |
if (!(zio->io_flags & ZIO_FLAG_SCRUB) && |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
420 |
!vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL, |
789 | 421 |
zio->io_txg, 1)) |
422 |
continue; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
423 |
mc->mc_error = ESTALE; |
789 | 424 |
} |
425 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
426 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
427 |
mc->mc_vd, mc->mc_offset, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
428 |
zio->io_data, zio->io_size, |
789 | 429 |
ZIO_TYPE_WRITE, zio->io_priority, |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
430 |
ZIO_FLAG_IO_REPAIR | (unexpected_errors ? |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
431 |
ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); |
789 | 432 |
} |
433 |
} |
|
434 |
} |
|
435 |
||
436 |
static void |
|
437 |
vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) |
|
438 |
{ |
|
439 |
if (faulted == vd->vdev_children) |
|
1544 | 440 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
441 |
VDEV_AUX_NO_REPLICAS); |
|
789 | 442 |
else if (degraded + faulted != 0) |
1544 | 443 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); |
789 | 444 |
else |
1544 | 445 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); |
789 | 446 |
} |
447 |
||
448 |
vdev_ops_t vdev_mirror_ops = { |
|
449 |
vdev_mirror_open, |
|
450 |
vdev_mirror_close, |
|
451 |
vdev_default_asize, |
|
452 |
vdev_mirror_io_start, |
|
453 |
vdev_mirror_io_done, |
|
454 |
vdev_mirror_state_change, |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
455 |
NULL, |
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
456 |
NULL, |
789 | 457 |
VDEV_TYPE_MIRROR, /* name of this vdev type */ |
458 |
B_FALSE /* not a leaf vdev */ |
|
459 |
}; |
|
460 |
||
461 |
vdev_ops_t vdev_replacing_ops = { |
|
462 |
vdev_mirror_open, |
|
463 |
vdev_mirror_close, |
|
464 |
vdev_default_asize, |
|
465 |
vdev_mirror_io_start, |
|
466 |
vdev_mirror_io_done, |
|
467 |
vdev_mirror_state_change, |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
468 |
NULL, |
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
469 |
NULL, |
789 | 470 |
VDEV_TYPE_REPLACING, /* name of this vdev type */ |
471 |
B_FALSE /* not a leaf vdev */ |
|
472 |
}; |
|
2082 | 473 |
|
474 |
vdev_ops_t vdev_spare_ops = { |
|
475 |
vdev_mirror_open, |
|
476 |
vdev_mirror_close, |
|
477 |
vdev_default_asize, |
|
478 |
vdev_mirror_io_start, |
|
479 |
vdev_mirror_io_done, |
|
480 |
vdev_mirror_state_change, |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
481 |
NULL, |
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
482 |
NULL, |
2082 | 483 |
VDEV_TYPE_SPARE, /* name of this vdev type */ |
484 |
B_FALSE /* not a leaf vdev */ |
|
485 |
}; |