author | eschrock |
Sat, 08 Apr 2006 23:33:38 -0700 | |
changeset 1773 | d6e4f2855c14 |
parent 1544 | 938876158511 |
child 1955 | 24fe75aaee9a |
permissions | -rw-r--r-- |
1544 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
5 |
* Common Development and Distribution License (the "License"). |
|
6 |
* You may not use this file except in compliance with the License. |
|
7 |
* |
|
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
|
23 |
* Use is subject to license terms. |
|
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/spa.h> |
|
29 |
#include <sys/spa_impl.h> |
|
30 |
#include <sys/vdev.h> |
|
31 |
#include <sys/vdev_impl.h> |
|
32 |
#include <sys/zio.h> |
|
33 |
||
34 |
#include <sys/fm/fs/zfs.h> |
|
35 |
#include <sys/fm/protocol.h> |
|
36 |
#include <sys/fm/util.h> |
|
37 |
#include <sys/sysevent.h> |
|
38 |
||
39 |
/* |
|
40 |
* This general routine is responsible for generating all the different ZFS |
|
41 |
* ereports. The payload is dependent on the class, and which arguments are |
|
42 |
* supplied to the function: |
|
43 |
* |
|
44 |
* EREPORT POOL VDEV IO |
|
45 |
* block X X X |
|
46 |
* data X X |
|
47 |
* device X X |
|
48 |
* pool X |
|
49 |
* |
|
50 |
* If we are in a loading state, all errors are chained together by the same |
|
51 |
* SPA-wide ENA. |
|
52 |
* |
|
53 |
* For isolated I/O requests, we get the ENA from the zio_t. The propagation |
|
54 |
* gets very complicated due to RAID-Z, gang blocks, and vdev caching. We want |
|
55 |
* to chain together all ereports associated with a logical piece of data. For |
|
56 |
* read I/Os, there are basically three 'types' of I/O, which form a roughly |
|
57 |
* layered diagram: |
|
58 |
* |
|
59 |
* +---------------+ |
|
60 |
* | Aggregate I/O | No associated logical data or device |
|
61 |
* +---------------+ |
|
62 |
* | |
|
63 |
* V |
|
64 |
* +---------------+ Reads associated with a piece of logical data. |
|
65 |
* | Read I/O | This includes reads on behalf of RAID-Z, |
|
66 |
* +---------------+ mirrors, gang blocks, retries, etc. |
|
67 |
* | |
|
68 |
* V |
|
69 |
* +---------------+ Reads associated with a particular device, but |
|
70 |
* | Physical I/O | no logical data. Issued as part of vdev caching |
|
71 |
* +---------------+ and I/O aggregation. |
|
72 |
* |
|
73 |
* Note that 'physical I/O' here is not the same terminology as used in the rest |
|
74 |
* of ZIO. Typically, 'physical I/O' simply means that there is no attached |
|
75 |
* blockpointer. But I/O with no associated block pointer can still be related |
|
76 |
* to a logical piece of data (i.e. RAID-Z requests). |
|
77 |
* |
|
78 |
* Purely physical I/O always have unique ENAs. They are not related to a |
|
79 |
* particular piece of logical data, and therefore cannot be chained together. |
|
80 |
* We still generate an ereport, but the DE doesn't correlate it with any |
|
81 |
* logical piece of data. When such an I/O fails, the delegated I/O requests |
|
82 |
* will issue a retry, which will trigger the 'real' ereport with the correct |
|
83 |
* ENA. |
|
84 |
* |
|
85 |
* We keep track of the ENA for a ZIO chain through the 'io_logical' member. |
|
86 |
* When a new logical I/O is issued, we set this to point to itself. Child I/Os |
|
87 |
* then inherit this pointer, so that when it is first set subsequent failures |
|
88 |
* will use the same ENA. If a physical I/O is issued (by passing the |
|
89 |
* ZIO_FLAG_NOBOOKMARK flag), then this pointer is reset, guaranteeing that a |
|
90 |
* unique ENA will be generated. For an aggregate I/O, this pointer is set to |
|
91 |
* NULL, and no ereport will be generated (since it doesn't actually correspond |
|
92 |
* to any particular device or piece of data). |
|
93 |
*/ |
|
94 |
void |
|
95 |
zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, |
|
96 |
uint64_t stateoroffset, uint64_t size) |
|
97 |
{ |
|
98 |
#ifdef _KERNEL |
|
99 |
nvlist_t *ereport, *detector; |
|
100 |
uint64_t ena; |
|
101 |
char class[64]; |
|
102 |
||
103 |
/* |
|
104 |
* If we are doing a spa_tryimport(), ignore errors. |
|
105 |
*/ |
|
106 |
if (spa->spa_load_state == SPA_LOAD_TRYIMPORT) |
|
107 |
return; |
|
108 |
||
109 |
/* |
|
110 |
* If we are in the middle of opening a pool, and the previous attempt |
|
111 |
* failed, don't bother logging any new ereports - we're just going to |
|
112 |
* get the same diagnosis anyway. |
|
113 |
*/ |
|
114 |
if (spa->spa_load_state != SPA_LOAD_NONE && |
|
115 |
spa->spa_last_open_failed) |
|
116 |
return; |
|
117 |
||
118 |
/* |
|
119 |
* Ignore any errors from I/Os that we are going to retry anyway - we |
|
120 |
* only generate errors from the final failure. |
|
121 |
*/ |
|
122 |
if (zio && zio_should_retry(zio)) |
|
123 |
return; |
|
124 |
||
1773
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
125 |
/* |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
126 |
* If this is not a read or write zio, ignore the error. This can occur |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
127 |
* if the DKIOCFLUSHWRITECACHE ioctl fails. |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
128 |
*/ |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
129 |
if (zio && zio->io_type != ZIO_TYPE_READ && |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
130 |
zio->io_type != ZIO_TYPE_WRITE) |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
131 |
return; |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1544
diff
changeset
|
132 |
|
1544 | 133 |
if ((ereport = fm_nvlist_create(NULL)) == NULL) |
134 |
return; |
|
135 |
||
136 |
if ((detector = fm_nvlist_create(NULL)) == NULL) { |
|
137 |
fm_nvlist_destroy(ereport, FM_NVA_FREE); |
|
138 |
return; |
|
139 |
} |
|
140 |
||
141 |
/* |
|
142 |
* Serialize ereport generation |
|
143 |
*/ |
|
144 |
mutex_enter(&spa->spa_errlist_lock); |
|
145 |
||
146 |
/* |
|
147 |
* Determine the ENA to use for this event. If we are in a loading |
|
148 |
* state, use a SPA-wide ENA. Otherwise, if we are in an I/O state, use |
|
149 |
* a root zio-wide ENA. Otherwise, simply use a unique ENA. |
|
150 |
*/ |
|
151 |
if (spa->spa_load_state != SPA_LOAD_NONE) { |
|
152 |
if (spa->spa_ena == 0) |
|
153 |
spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1); |
|
154 |
ena = spa->spa_ena; |
|
155 |
} else if (zio != NULL && zio->io_logical != NULL) { |
|
156 |
if (zio->io_logical->io_ena == 0) |
|
157 |
zio->io_logical->io_ena = |
|
158 |
fm_ena_generate(0, FM_ENA_FMT1); |
|
159 |
ena = zio->io_logical->io_ena; |
|
160 |
} else { |
|
161 |
ena = fm_ena_generate(0, FM_ENA_FMT1); |
|
162 |
} |
|
163 |
||
164 |
/* |
|
165 |
* Construct the full class, detector, and other standard FMA fields. |
|
166 |
*/ |
|
167 |
(void) snprintf(class, sizeof (class), "%s.%s", |
|
168 |
ZFS_ERROR_CLASS, subclass); |
|
169 |
||
170 |
fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa), |
|
171 |
vd != NULL ? vd->vdev_guid : 0); |
|
172 |
||
173 |
fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL); |
|
174 |
||
175 |
/* |
|
176 |
* Construct the per-ereport payload, depending on which parameters are |
|
177 |
* passed in. |
|
178 |
*/ |
|
179 |
||
180 |
/* |
|
181 |
* Generic payload members common to all ereports. |
|
182 |
* |
|
183 |
* The direct reference to spa_name is used rather than spa_name() |
|
184 |
* because of the asynchronous nature of the zio pipeline. spa_name() |
|
185 |
* asserts that the config lock is held in some form. This is always |
|
186 |
* the case in I/O context, but because the check for RW_WRITER compares |
|
187 |
* against 'curthread', we may be in an asynchronous context and blow |
|
188 |
* this assert. Rather than loosen this assert, we acknowledge that all |
|
189 |
* contexts in which this function is called (pool open, I/O) are safe, |
|
190 |
* and dereference the name directly. |
|
191 |
*/ |
|
192 |
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL, |
|
193 |
DATA_TYPE_STRING, spa->spa_name, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, |
|
194 |
DATA_TYPE_UINT64, spa_guid(spa), |
|
195 |
FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32, |
|
196 |
spa->spa_load_state, NULL); |
|
197 |
||
198 |
if (vd != NULL) { |
|
199 |
vdev_t *pvd = vd->vdev_parent; |
|
200 |
||
201 |
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, |
|
202 |
DATA_TYPE_UINT64, vd->vdev_guid, |
|
203 |
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, |
|
204 |
DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL); |
|
205 |
if (vd->vdev_path) |
|
206 |
fm_payload_set(ereport, |
|
207 |
FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, |
|
208 |
DATA_TYPE_STRING, vd->vdev_path, NULL); |
|
209 |
if (vd->vdev_devid) |
|
210 |
fm_payload_set(ereport, |
|
211 |
FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, |
|
212 |
DATA_TYPE_STRING, vd->vdev_devid, NULL); |
|
213 |
||
214 |
if (pvd != NULL) { |
|
215 |
fm_payload_set(ereport, |
|
216 |
FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, |
|
217 |
DATA_TYPE_UINT64, pvd->vdev_guid, |
|
218 |
FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE, |
|
219 |
DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type, |
|
220 |
NULL); |
|
221 |
if (pvd->vdev_path) |
|
222 |
fm_payload_set(ereport, |
|
223 |
FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH, |
|
224 |
DATA_TYPE_STRING, vd->vdev_path, NULL); |
|
225 |
if (pvd->vdev_devid) |
|
226 |
fm_payload_set(ereport, |
|
227 |
FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, |
|
228 |
DATA_TYPE_STRING, pvd->vdev_devid, NULL); |
|
229 |
} |
|
230 |
} |
|
231 |
||
232 |
if (zio != NULL) { |
|
233 |
/* |
|
234 |
* Payload common to all I/Os. |
|
235 |
*/ |
|
236 |
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR, |
|
237 |
DATA_TYPE_INT32, zio->io_error, NULL); |
|
238 |
||
239 |
/* |
|
240 |
* If the 'size' parameter is non-zero, it indicates this is a |
|
241 |
* RAID-Z or other I/O where the physical offset and length are |
|
242 |
* provided for us, instead of within the zio_t. |
|
243 |
*/ |
|
244 |
if (vd != NULL) { |
|
245 |
if (size) |
|
246 |
fm_payload_set(ereport, |
|
247 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, |
|
248 |
DATA_TYPE_UINT64, stateoroffset, |
|
249 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, |
|
250 |
DATA_TYPE_UINT64, size); |
|
251 |
else |
|
252 |
fm_payload_set(ereport, |
|
253 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, |
|
254 |
DATA_TYPE_UINT64, zio->io_offset, |
|
255 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, |
|
256 |
DATA_TYPE_UINT64, zio->io_size); |
|
257 |
} |
|
258 |
||
259 |
/* |
|
260 |
* Payload for I/Os with corresponding logical information. |
|
261 |
*/ |
|
262 |
if (zio->io_logical != NULL) |
|
263 |
fm_payload_set(ereport, |
|
264 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, |
|
265 |
DATA_TYPE_UINT64, |
|
266 |
zio->io_logical->io_bookmark.zb_objset, |
|
267 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, |
|
268 |
DATA_TYPE_UINT64, |
|
269 |
zio->io_logical->io_bookmark.zb_object, |
|
270 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, |
|
271 |
DATA_TYPE_INT32, |
|
272 |
zio->io_logical->io_bookmark.zb_level, |
|
273 |
FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, |
|
274 |
DATA_TYPE_UINT64, |
|
275 |
zio->io_logical->io_bookmark.zb_blkid); |
|
276 |
} else if (vd != NULL) { |
|
277 |
/* |
|
278 |
* If we have a vdev but no zio, this is a device fault, and the |
|
279 |
* 'stateoroffset' parameter indicates the previous state of the |
|
280 |
* vdev. |
|
281 |
*/ |
|
282 |
fm_payload_set(ereport, |
|
283 |
FM_EREPORT_PAYLOAD_ZFS_PREV_STATE, |
|
284 |
DATA_TYPE_UINT64, stateoroffset, NULL); |
|
285 |
} |
|
286 |
mutex_exit(&spa->spa_errlist_lock); |
|
287 |
||
288 |
fm_ereport_post(ereport, EVCH_SLEEP); |
|
289 |
||
290 |
fm_nvlist_destroy(ereport, FM_NVA_FREE); |
|
291 |
fm_nvlist_destroy(detector, FM_NVA_FREE); |
|
292 |
#endif |
|
293 |
} |
|
294 |
||
295 |
/* |
|
296 |
* The 'resource.fs.zfs.ok' event is an internal signal that the associated |
|
297 |
* resource (pool or disk) has been identified by ZFS as healthy. This will |
|
298 |
* then trigger the DE to close the associated case, if any. |
|
299 |
*/ |
|
300 |
void |
|
301 |
zfs_post_ok(spa_t *spa, vdev_t *vd) |
|
302 |
{ |
|
303 |
#ifdef _KERNEL |
|
304 |
nvlist_t *resource; |
|
305 |
char class[64]; |
|
306 |
||
307 |
if ((resource = fm_nvlist_create(NULL)) == NULL) |
|
308 |
return; |
|
309 |
||
310 |
(void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, |
|
311 |
ZFS_ERROR_CLASS, FM_RESOURCE_OK); |
|
312 |
VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); |
|
313 |
VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); |
|
314 |
VERIFY(nvlist_add_uint64(resource, |
|
315 |
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); |
|
316 |
if (vd) |
|
317 |
VERIFY(nvlist_add_uint64(resource, |
|
318 |
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); |
|
319 |
||
320 |
fm_ereport_post(resource, EVCH_SLEEP); |
|
321 |
||
322 |
fm_nvlist_destroy(resource, FM_NVA_FREE); |
|
323 |
#endif |
|
324 |
} |