author | Matthew Ahrens <mahrens@delphix.com> |
Fri, 08 Mar 2013 10:41:28 -0800 | |
changeset 13980 | d7059eb1884c |
parent 13570 | 3411fd5f1589 |
child 14167 | dceb17481b99 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
22 |
* Copyright 2010 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
13570
3411fd5f1589
1948 zpool list should show more detailed pool information
George Wilson <George.Wilson@delphix.com>
parents:
11958
diff
changeset
|
26 |
/* |
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13570
diff
changeset
|
27 |
* Copyright (c) 2013 by Delphix. All rights reserved. |
13570
3411fd5f1589
1948 zpool list should show more detailed pool information
George Wilson <George.Wilson@delphix.com>
parents:
11958
diff
changeset
|
28 |
*/ |
3411fd5f1589
1948 zpool list should show more detailed pool information
George Wilson <George.Wilson@delphix.com>
parents:
11958
diff
changeset
|
29 |
|
789 | 30 |
#include <sys/zfs_context.h> |
31 |
#include <sys/spa.h> |
|
32 |
#include <sys/vdev_impl.h> |
|
33 |
#include <sys/zio.h> |
|
34 |
#include <sys/fs/zfs.h> |
|
35 |
||
36 |
/* |
|
37 |
* Virtual device vector for mirroring. |
|
38 |
*/ |
|
39 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
40 |
typedef struct mirror_child { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
41 |
vdev_t *mc_vd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
42 |
uint64_t mc_offset; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
43 |
int mc_error; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
44 |
uint8_t mc_tried; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
45 |
uint8_t mc_skipped; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
46 |
uint8_t mc_speculative; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
47 |
} mirror_child_t; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
48 |
|
789 | 49 |
typedef struct mirror_map { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
50 |
int mm_children; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
51 |
int mm_replacing; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
52 |
int mm_preferred; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
53 |
int mm_root; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
54 |
mirror_child_t mm_child[1]; |
789 | 55 |
} mirror_map_t; |
56 |
||
2391 | 57 |
int vdev_mirror_shift = 21; |
58 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
59 |
static void |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
60 |
vdev_mirror_map_free(zio_t *zio) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
61 |
{ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
62 |
mirror_map_t *mm = zio->io_vsd; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
63 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
64 |
kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children])); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
65 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
66 |
|
10614
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
67 |
static const zio_vsd_ops_t vdev_mirror_vsd_ops = { |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
68 |
vdev_mirror_map_free, |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
69 |
zio_vsd_default_cksum_report |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
70 |
}; |
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
71 |
|
789 | 72 |
static mirror_map_t * |
73 |
vdev_mirror_map_alloc(zio_t *zio) |
|
74 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
75 |
mirror_map_t *mm = NULL; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
76 |
mirror_child_t *mc; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
77 |
vdev_t *vd = zio->io_vd; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
78 |
int c, d; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
79 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
80 |
if (vd == NULL) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
81 |
dva_t *dva = zio->io_bp->blk_dva; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
82 |
spa_t *spa = zio->io_spa; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
83 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
84 |
c = BP_GET_NDVAS(zio->io_bp); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
85 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
86 |
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
87 |
mm->mm_children = c; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
88 |
mm->mm_replacing = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
89 |
mm->mm_preferred = spa_get_random(c); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
90 |
mm->mm_root = B_TRUE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
91 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
92 |
/* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
93 |
* Check the other, lower-index DVAs to see if they're on |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
94 |
* the same vdev as the child we picked. If they are, use |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
95 |
* them since they are likely to have been allocated from |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
96 |
* the primary metaslab in use at the time, and hence are |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
97 |
* more likely to have locality with single-copy data. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
98 |
*/ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
99 |
for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
100 |
if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c])) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
101 |
mm->mm_preferred = d; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
102 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
103 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
104 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
105 |
mc = &mm->mm_child[c]; |
2082 | 106 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
107 |
mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
108 |
mc->mc_offset = DVA_GET_OFFSET(&dva[c]); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
109 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
110 |
} else { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
111 |
c = vd->vdev_children; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
112 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
113 |
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
114 |
mm->mm_children = c; |
2082 | 115 |
mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || |
116 |
vd->vdev_ops == &vdev_spare_ops); |
|
2391 | 117 |
mm->mm_preferred = mm->mm_replacing ? 0 : |
118 |
(zio->io_offset >> vdev_mirror_shift) % c; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
119 |
mm->mm_root = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
120 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
121 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
122 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
123 |
mc->mc_vd = vd->vdev_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
124 |
mc->mc_offset = zio->io_offset; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
125 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
126 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
127 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
128 |
zio->io_vsd = mm; |
10614
4f397871da47
PSARC 2009/497 zfs checksum ereport payload additions
Jonathan Adams <Jonathan.Adams@Sun.COM>
parents:
9846
diff
changeset
|
129 |
zio->io_vsd_ops = &vdev_mirror_vsd_ops; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
130 |
return (mm); |
789 | 131 |
} |
132 |
||
133 |
static int |
|
13570
3411fd5f1589
1948 zpool list should show more detailed pool information
George Wilson <George.Wilson@delphix.com>
parents:
11958
diff
changeset
|
134 |
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, |
3411fd5f1589
1948 zpool list should show more detailed pool information
George Wilson <George.Wilson@delphix.com>
parents:
11958
diff
changeset
|
135 |
uint64_t *ashift) |
789 | 136 |
{ |
137 |
int numerrors = 0; |
|
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
138 |
int lasterror = 0; |
789 | 139 |
|
140 |
if (vd->vdev_children == 0) { |
|
141 |
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13570
diff
changeset
|
142 |
return (SET_ERROR(EINVAL)); |
789 | 143 |
} |
144 |
||
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
145 |
vdev_open_children(vd); |
789 | 146 |
|
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
147 |
for (int c = 0; c < vd->vdev_children; c++) { |
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
148 |
vdev_t *cvd = vd->vdev_child[c]; |
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
149 |
|
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
150 |
if (cvd->vdev_open_error) { |
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
151 |
lasterror = cvd->vdev_open_error; |
789 | 152 |
numerrors++; |
153 |
continue; |
|
154 |
} |
|
155 |
||
156 |
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; |
|
13570
3411fd5f1589
1948 zpool list should show more detailed pool information
George Wilson <George.Wilson@delphix.com>
parents:
11958
diff
changeset
|
157 |
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; |
1732 | 158 |
*ashift = MAX(*ashift, cvd->vdev_ashift); |
789 | 159 |
} |
160 |
||
161 |
if (numerrors == vd->vdev_children) { |
|
162 |
vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; |
|
163 |
return (lasterror); |
|
164 |
} |
|
165 |
||
166 |
return (0); |
|
167 |
} |
|
168 |
||
169 |
static void |
|
170 |
vdev_mirror_close(vdev_t *vd) |
|
171 |
{ |
|
9846
6527c7b4a92e
6566744 vdev_open() should be done in parallel
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
8632
diff
changeset
|
172 |
for (int c = 0; c < vd->vdev_children; c++) |
789 | 173 |
vdev_close(vd->vdev_child[c]); |
174 |
} |
|
175 |
||
176 |
static void |
|
177 |
vdev_mirror_child_done(zio_t *zio) |
|
178 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
179 |
mirror_child_t *mc = zio->io_private; |
789 | 180 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
181 |
mc->mc_error = zio->io_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
182 |
mc->mc_tried = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
183 |
mc->mc_skipped = 0; |
789 | 184 |
} |
185 |
||
186 |
static void |
|
187 |
vdev_mirror_scrub_done(zio_t *zio) |
|
188 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
189 |
mirror_child_t *mc = zio->io_private; |
789 | 190 |
|
191 |
if (zio->io_error == 0) { |
|
8632
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
192 |
zio_t *pio; |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
193 |
|
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
194 |
mutex_enter(&zio->io_lock); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
195 |
while ((pio = zio_walk_parents(zio)) != NULL) { |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
196 |
mutex_enter(&pio->io_lock); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
197 |
ASSERT3U(zio->io_size, >=, pio->io_size); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
198 |
bcopy(zio->io_data, pio->io_data, pio->io_size); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
199 |
mutex_exit(&pio->io_lock); |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
200 |
} |
36ef517870a3
6798384 It can take a village to raise a zio
Bill Moore <Bill.Moore@Sun.COM>
parents:
8241
diff
changeset
|
201 |
mutex_exit(&zio->io_lock); |
789 | 202 |
} |
203 |
||
204 |
zio_buf_free(zio->io_data, zio->io_size); |
|
205 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
206 |
mc->mc_error = zio->io_error; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
207 |
mc->mc_tried = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
208 |
mc->mc_skipped = 0; |
789 | 209 |
} |
210 |
||
211 |
/* |
|
212 |
* Try to find a child whose DTL doesn't contain the block we want to read. |
|
213 |
* If we can't, try the read on any vdev we haven't already tried. |
|
214 |
*/ |
|
215 |
static int |
|
216 |
vdev_mirror_child_select(zio_t *zio) |
|
217 |
{ |
|
218 |
mirror_map_t *mm = zio->io_vsd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
219 |
mirror_child_t *mc; |
789 | 220 |
uint64_t txg = zio->io_txg; |
221 |
int i, c; |
|
222 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10614
diff
changeset
|
223 |
ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg); |
789 | 224 |
|
225 |
/* |
|
226 |
* Try to find a child whose DTL doesn't contain the block to read. |
|
227 |
* If a child is known to be completely inaccessible (indicated by |
|
5329 | 228 |
* vdev_readable() returning B_FALSE), don't even try. |
789 | 229 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
230 |
for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
231 |
if (c >= mm->mm_children) |
789 | 232 |
c = 0; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
233 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
234 |
if (mc->mc_tried || mc->mc_skipped) |
789 | 235 |
continue; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
236 |
if (!vdev_readable(mc->mc_vd)) { |
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13570
diff
changeset
|
237 |
mc->mc_error = SET_ERROR(ENXIO); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
238 |
mc->mc_tried = 1; /* don't even try */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
239 |
mc->mc_skipped = 1; |
789 | 240 |
continue; |
241 |
} |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
242 |
if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) |
789 | 243 |
return (c); |
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13570
diff
changeset
|
244 |
mc->mc_error = SET_ERROR(ESTALE); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
245 |
mc->mc_skipped = 1; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
246 |
mc->mc_speculative = 1; |
789 | 247 |
} |
248 |
||
249 |
/* |
|
250 |
* Every device is either missing or has this txg in its DTL. |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
251 |
* Look for any child we haven't already tried before giving up. |
789 | 252 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
253 |
for (c = 0; c < mm->mm_children; c++) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
254 |
if (!mm->mm_child[c].mc_tried) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
255 |
return (c); |
789 | 256 |
|
257 |
/* |
|
258 |
* Every child failed. There's no place left to look. |
|
259 |
*/ |
|
260 |
return (-1); |
|
261 |
} |
|
262 |
||
5530 | 263 |
static int |
789 | 264 |
vdev_mirror_io_start(zio_t *zio) |
265 |
{ |
|
266 |
mirror_map_t *mm; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
267 |
mirror_child_t *mc; |
789 | 268 |
int c, children; |
269 |
||
270 |
mm = vdev_mirror_map_alloc(zio); |
|
271 |
||
272 |
if (zio->io_type == ZIO_TYPE_READ) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
273 |
if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) { |
789 | 274 |
/* |
275 |
* For scrubbing reads we need to allocate a read |
|
276 |
* buffer for each child and issue reads to all |
|
277 |
* children. If any child succeeds, it will copy its |
|
278 |
* data into zio->io_data in vdev_mirror_scrub_done. |
|
279 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
280 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
281 |
mc = &mm->mm_child[c]; |
789 | 282 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
283 |
mc->mc_vd, mc->mc_offset, |
789 | 284 |
zio_buf_alloc(zio->io_size), zio->io_size, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
285 |
zio->io_type, zio->io_priority, 0, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
286 |
vdev_mirror_scrub_done, mc)); |
789 | 287 |
} |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
288 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 289 |
} |
290 |
/* |
|
291 |
* For normal reads just pick one child. |
|
292 |
*/ |
|
293 |
c = vdev_mirror_child_select(zio); |
|
294 |
children = (c >= 0); |
|
295 |
} else { |
|
296 |
ASSERT(zio->io_type == ZIO_TYPE_WRITE); |
|
297 |
||
298 |
/* |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
299 |
* Writes go to all children. |
789 | 300 |
*/ |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
301 |
c = 0; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
302 |
children = mm->mm_children; |
789 | 303 |
} |
304 |
||
305 |
while (children--) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
306 |
mc = &mm->mm_child[c]; |
789 | 307 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
308 |
mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
309 |
zio->io_type, zio->io_priority, 0, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
310 |
vdev_mirror_child_done, mc)); |
789 | 311 |
c++; |
312 |
} |
|
313 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
314 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 315 |
} |
316 |
||
5530 | 317 |
static int |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
318 |
vdev_mirror_worst_error(mirror_map_t *mm) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
319 |
{ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
320 |
int error[2] = { 0, 0 }; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
321 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
322 |
for (int c = 0; c < mm->mm_children; c++) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
323 |
mirror_child_t *mc = &mm->mm_child[c]; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
324 |
int s = mc->mc_speculative; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
325 |
error[s] = zio_worst_error(error[s], mc->mc_error); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
326 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
327 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
328 |
return (error[0] ? error[0] : error[1]); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
329 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
330 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
331 |
static void |
789 | 332 |
vdev_mirror_io_done(zio_t *zio) |
333 |
{ |
|
334 |
mirror_map_t *mm = zio->io_vsd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
335 |
mirror_child_t *mc; |
789 | 336 |
int c; |
337 |
int good_copies = 0; |
|
338 |
int unexpected_errors = 0; |
|
339 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
340 |
for (c = 0; c < mm->mm_children; c++) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
341 |
mc = &mm->mm_child[c]; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
342 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
343 |
if (mc->mc_error) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
344 |
if (!mc->mc_skipped) |
789 | 345 |
unexpected_errors++; |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
346 |
} else if (mc->mc_tried) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
347 |
good_copies++; |
789 | 348 |
} |
349 |
} |
|
350 |
||
351 |
if (zio->io_type == ZIO_TYPE_WRITE) { |
|
352 |
/* |
|
353 |
* XXX -- for now, treat partial writes as success. |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
354 |
* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
355 |
* Now that we support write reallocation, it would be better |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
356 |
* to treat partial failure as real failure unless there are |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
357 |
* no non-degraded top-level vdevs left, and not update DTLs |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
358 |
* if we intend to reallocate. |
789 | 359 |
*/ |
360 |
/* XXPOLICY */ |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
361 |
if (good_copies != mm->mm_children) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
362 |
/* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
363 |
* Always require at least one good copy. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
364 |
* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
365 |
* For ditto blocks (io_vd == NULL), require |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
366 |
* all copies to be good. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
367 |
* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
368 |
* XXX -- for replacing vdevs, there's no great answer. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
369 |
* If the old device is really dead, we may not even |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
370 |
* be able to access it -- so we only want to |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
371 |
* require good writes to the new device. But if |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
372 |
* the new device turns out to be flaky, we want |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
373 |
* to be able to detach it -- which requires all |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
374 |
* writes to the old device to have succeeded. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
375 |
*/ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
376 |
if (good_copies == 0 || zio->io_vd == NULL) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
377 |
zio->io_error = vdev_mirror_worst_error(mm); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
378 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
379 |
return; |
789 | 380 |
} |
381 |
||
382 |
ASSERT(zio->io_type == ZIO_TYPE_READ); |
|
383 |
||
384 |
/* |
|
385 |
* If we don't have a good copy yet, keep trying other children. |
|
386 |
*/ |
|
387 |
/* XXPOLICY */ |
|
388 |
if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) { |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
389 |
ASSERT(c >= 0 && c < mm->mm_children); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
390 |
mc = &mm->mm_child[c]; |
789 | 391 |
zio_vdev_io_redone(zio); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
392 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
393 |
mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
394 |
ZIO_TYPE_READ, zio->io_priority, 0, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
395 |
vdev_mirror_child_done, mc)); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
396 |
return; |
789 | 397 |
} |
398 |
||
399 |
/* XXPOLICY */ |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
400 |
if (good_copies == 0) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
401 |
zio->io_error = vdev_mirror_worst_error(mm); |
789 | 402 |
ASSERT(zio->io_error != 0); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
403 |
} |
789 | 404 |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
405 |
if (good_copies && spa_writeable(zio->io_spa) && |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
406 |
(unexpected_errors || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
407 |
(zio->io_flags & ZIO_FLAG_RESILVER) || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
408 |
((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) { |
789 | 409 |
/* |
410 |
* Use the good data we have in hand to repair damaged children. |
|
411 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
412 |
for (c = 0; c < mm->mm_children; c++) { |
789 | 413 |
/* |
414 |
* Don't rewrite known good children. |
|
415 |
* Not only is it unnecessary, it could |
|
416 |
* actually be harmful: if the system lost |
|
417 |
* power while rewriting the only good copy, |
|
418 |
* there would be no good copies left! |
|
419 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
420 |
mc = &mm->mm_child[c]; |
789 | 421 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
422 |
if (mc->mc_error == 0) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
423 |
if (mc->mc_tried) |
789 | 424 |
continue; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
425 |
if (!(zio->io_flags & ZIO_FLAG_SCRUB) && |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
426 |
!vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL, |
789 | 427 |
zio->io_txg, 1)) |
428 |
continue; |
|
13980
d7059eb1884c
3598 want to dtrace when errors are generated in zfs
Matthew Ahrens <mahrens@delphix.com>
parents:
13570
diff
changeset
|
429 |
mc->mc_error = SET_ERROR(ESTALE); |
789 | 430 |
} |
431 |
||
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
432 |
zio_nowait(zio_vdev_child_io(zio, zio->io_bp, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
433 |
mc->mc_vd, mc->mc_offset, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7473
diff
changeset
|
434 |
zio->io_data, zio->io_size, |
789 | 435 |
ZIO_TYPE_WRITE, zio->io_priority, |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
436 |
ZIO_FLAG_IO_REPAIR | (unexpected_errors ? |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7754
diff
changeset
|
437 |
ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); |
789 | 438 |
} |
439 |
} |
|
440 |
} |
|
441 |
||
442 |
static void |
|
443 |
vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) |
|
444 |
{ |
|
445 |
if (faulted == vd->vdev_children) |
|
1544 | 446 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, |
447 |
VDEV_AUX_NO_REPLICAS); |
|
789 | 448 |
else if (degraded + faulted != 0) |
1544 | 449 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); |
789 | 450 |
else |
1544 | 451 |
vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE); |
789 | 452 |
} |
453 |
||
454 |
vdev_ops_t vdev_mirror_ops = { |
|
455 |
vdev_mirror_open, |
|
456 |
vdev_mirror_close, |
|
457 |
vdev_default_asize, |
|
458 |
vdev_mirror_io_start, |
|
459 |
vdev_mirror_io_done, |
|
460 |
vdev_mirror_state_change, |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
461 |
NULL, |
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
462 |
NULL, |
789 | 463 |
VDEV_TYPE_MIRROR, /* name of this vdev type */ |
464 |
B_FALSE /* not a leaf vdev */ |
|
465 |
}; |
|
466 |
||
467 |
vdev_ops_t vdev_replacing_ops = { |
|
468 |
vdev_mirror_open, |
|
469 |
vdev_mirror_close, |
|
470 |
vdev_default_asize, |
|
471 |
vdev_mirror_io_start, |
|
472 |
vdev_mirror_io_done, |
|
473 |
vdev_mirror_state_change, |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
474 |
NULL, |
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
475 |
NULL, |
789 | 476 |
VDEV_TYPE_REPLACING, /* name of this vdev type */ |
477 |
B_FALSE /* not a leaf vdev */ |
|
478 |
}; |
|
2082 | 479 |
|
480 |
vdev_ops_t vdev_spare_ops = { |
|
481 |
vdev_mirror_open, |
|
482 |
vdev_mirror_close, |
|
483 |
vdev_default_asize, |
|
484 |
vdev_mirror_io_start, |
|
485 |
vdev_mirror_io_done, |
|
486 |
vdev_mirror_state_change, |
|
11958
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
487 |
NULL, |
575ffe1e978d
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
George Wilson <George.Wilson@Sun.COM>
parents:
10922
diff
changeset
|
488 |
NULL, |
2082 | 489 |
VDEV_TYPE_SPARE, /* name of this vdev type */ |
490 |
B_FALSE /* not a leaf vdev */ |
|
491 |
}; |