author | ek110237 |
Wed, 30 Apr 2008 12:37:56 -0700 | |
changeset 6523 | c1d2a7f04573 |
parent 6423 | 437422a29d3a |
child 6615 | 333cfc13ec55 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1489
fa842259660e
6322205 Enable disk write cache if ZFS owns the disk
webaker
parents:
1171
diff
changeset
|
5 |
* Common Development and Distribution License (the "License"). |
fa842259660e
6322205 Enable disk write cache if ZFS owns the disk
webaker
parents:
1171
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
6423 | 22 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/zfs_context.h> |
|
29 |
#include <sys/spa.h> |
|
6423 | 30 |
#include <sys/refcount.h> |
789 | 31 |
#include <sys/vdev_disk.h> |
32 |
#include <sys/vdev_impl.h> |
|
33 |
#include <sys/fs/zfs.h> |
|
34 |
#include <sys/zio.h> |
|
1171 | 35 |
#include <sys/sunldi.h> |
789 | 36 |
|
37 |
/* |
|
38 |
* Virtual device vector for disks. |
|
39 |
*/ |
|
40 |
||
41 |
extern ldi_ident_t zfs_li; |
|
42 |
||
43 |
typedef struct vdev_disk_buf { |
|
44 |
buf_t vdb_buf; |
|
45 |
zio_t *vdb_io; |
|
46 |
} vdev_disk_buf_t; |
|
47 |
||
48 |
static int |
|
5329 | 49 |
vdev_disk_open_common(vdev_t *vd) |
789 | 50 |
{ |
51 |
vdev_disk_t *dvd; |
|
5329 | 52 |
dev_t dev; |
789 | 53 |
int error; |
54 |
||
55 |
/* |
|
56 |
* We must have a pathname, and it must be absolute. |
|
57 |
*/ |
|
58 |
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { |
|
59 |
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
60 |
return (EINVAL); |
|
61 |
} |
|
62 |
||
63 |
dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); |
|
64 |
||
65 |
/* |
|
66 |
* When opening a disk device, we want to preserve the user's original |
|
67 |
* intent. We always want to open the device by the path the user gave |
|
68 |
* us, even if it is one of multiple paths to the save device. But we |
|
69 |
* also want to be able to survive disks being removed/recabled. |
|
70 |
* Therefore the sequence of opening devices is: |
|
71 |
* |
|
1171 | 72 |
* 1. Try opening the device by path. For legacy pools without the |
73 |
* 'whole_disk' property, attempt to fix the path by appending 's0'. |
|
789 | 74 |
* |
75 |
* 2. If the devid of the device matches the stored value, return |
|
76 |
* success. |
|
77 |
* |
|
78 |
* 3. Otherwise, the device may have moved. Try opening the device |
|
79 |
* by the devid instead. |
|
80 |
* |
|
81 |
*/ |
|
82 |
if (vd->vdev_devid != NULL) { |
|
83 |
if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, |
|
84 |
&dvd->vd_minor) != 0) { |
|
85 |
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; |
|
86 |
return (EINVAL); |
|
87 |
} |
|
88 |
} |
|
89 |
||
90 |
error = EINVAL; /* presume failure */ |
|
91 |
||
92 |
if (vd->vdev_path != NULL) { |
|
93 |
ddi_devid_t devid; |
|
94 |
||
1171 | 95 |
if (vd->vdev_wholedisk == -1ULL) { |
96 |
size_t len = strlen(vd->vdev_path) + 3; |
|
97 |
char *buf = kmem_alloc(len, KM_SLEEP); |
|
98 |
ldi_handle_t lh; |
|
99 |
||
100 |
(void) snprintf(buf, len, "%ss0", vd->vdev_path); |
|
789 | 101 |
|
1171 | 102 |
if (ldi_open_by_name(buf, spa_mode, kcred, |
103 |
&lh, zfs_li) == 0) { |
|
104 |
spa_strfree(vd->vdev_path); |
|
105 |
vd->vdev_path = buf; |
|
106 |
vd->vdev_wholedisk = 1ULL; |
|
107 |
(void) ldi_close(lh, spa_mode, kcred); |
|
108 |
} else { |
|
109 |
kmem_free(buf, len); |
|
110 |
} |
|
111 |
} |
|
789 | 112 |
|
1171 | 113 |
error = ldi_open_by_name(vd->vdev_path, spa_mode, kcred, |
114 |
&dvd->vd_lh, zfs_li); |
|
789 | 115 |
|
116 |
/* |
|
117 |
* Compare the devid to the stored value. |
|
118 |
*/ |
|
119 |
if (error == 0 && vd->vdev_devid != NULL && |
|
120 |
ldi_get_devid(dvd->vd_lh, &devid) == 0) { |
|
121 |
if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { |
|
122 |
error = EINVAL; |
|
123 |
(void) ldi_close(dvd->vd_lh, spa_mode, kcred); |
|
124 |
dvd->vd_lh = NULL; |
|
125 |
} |
|
126 |
ddi_devid_free(devid); |
|
127 |
} |
|
1171 | 128 |
|
129 |
/* |
|
130 |
* If we succeeded in opening the device, but 'vdev_wholedisk' |
|
131 |
* is not yet set, then this must be a slice. |
|
132 |
*/ |
|
133 |
if (error == 0 && vd->vdev_wholedisk == -1ULL) |
|
134 |
vd->vdev_wholedisk = 0; |
|
789 | 135 |
} |
136 |
||
137 |
/* |
|
138 |
* If we were unable to open by path, or the devid check fails, open by |
|
139 |
* devid instead. |
|
140 |
*/ |
|
141 |
if (error != 0 && vd->vdev_devid != NULL) |
|
142 |
error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, |
|
143 |
spa_mode, kcred, &dvd->vd_lh, zfs_li); |
|
144 |
||
4451 | 145 |
/* |
146 |
* If all else fails, then try opening by physical path (if available) |
|
147 |
* or the logical path (if we failed due to the devid check). While not |
|
148 |
* as reliable as the devid, this will give us something, and the higher |
|
149 |
* level vdev validation will prevent us from opening the wrong device. |
|
150 |
*/ |
|
151 |
if (error) { |
|
152 |
if (vd->vdev_physpath != NULL && |
|
153 |
(dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != ENODEV) |
|
154 |
error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode, |
|
155 |
kcred, &dvd->vd_lh, zfs_li); |
|
156 |
||
157 |
/* |
|
158 |
* Note that we don't support the legacy auto-wholedisk support |
|
159 |
* as above. This hasn't been used in a very long time and we |
|
160 |
* don't need to propagate its oddities to this edge condition. |
|
161 |
*/ |
|
162 |
if (error && vd->vdev_path != NULL) |
|
163 |
error = ldi_open_by_name(vd->vdev_path, spa_mode, kcred, |
|
164 |
&dvd->vd_lh, zfs_li); |
|
165 |
} |
|
166 |
||
5329 | 167 |
if (error) |
789 | 168 |
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; |
5329 | 169 |
|
170 |
return (error); |
|
171 |
} |
|
172 |
||
173 |
static int |
|
174 |
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) |
|
175 |
{ |
|
176 |
vdev_disk_t *dvd; |
|
177 |
struct dk_minfo dkm; |
|
178 |
int error; |
|
179 |
dev_t dev; |
|
180 |
int otyp; |
|
181 |
||
182 |
error = vdev_disk_open_common(vd); |
|
183 |
if (error) |
|
789 | 184 |
return (error); |
185 |
||
5329 | 186 |
dvd = vd->vdev_tsd; |
789 | 187 |
/* |
4451 | 188 |
* Once a device is opened, verify that the physical device path (if |
189 |
* available) is up to date. |
|
190 |
*/ |
|
191 |
if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && |
|
192 |
ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { |
|
5329 | 193 |
char *physpath, *minorname; |
194 |
||
4451 | 195 |
physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
196 |
minorname = NULL; |
|
197 |
if (ddi_dev_pathname(dev, otyp, physpath) == 0 && |
|
198 |
ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && |
|
199 |
(vd->vdev_physpath == NULL || |
|
200 |
strcmp(vd->vdev_physpath, physpath) != 0)) { |
|
201 |
if (vd->vdev_physpath) |
|
202 |
spa_strfree(vd->vdev_physpath); |
|
203 |
(void) strlcat(physpath, ":", MAXPATHLEN); |
|
204 |
(void) strlcat(physpath, minorname, MAXPATHLEN); |
|
205 |
vd->vdev_physpath = spa_strdup(physpath); |
|
206 |
} |
|
207 |
if (minorname) |
|
208 |
kmem_free(minorname, strlen(minorname) + 1); |
|
209 |
kmem_free(physpath, MAXPATHLEN); |
|
210 |
} |
|
211 |
||
212 |
/* |
|
789 | 213 |
* Determine the actual size of the device. |
214 |
*/ |
|
215 |
if (ldi_get_size(dvd->vd_lh, psize) != 0) { |
|
216 |
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; |
|
217 |
return (EINVAL); |
|
218 |
} |
|
219 |
||
1732 | 220 |
/* |
221 |
* If we own the whole disk, try to enable disk write caching. |
|
222 |
* We ignore errors because it's OK if we can't do it. |
|
223 |
*/ |
|
1489
fa842259660e
6322205 Enable disk write cache if ZFS owns the disk
webaker
parents:
1171
diff
changeset
|
224 |
if (vd->vdev_wholedisk == 1) { |
1732 | 225 |
int wce = 1; |
226 |
(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, |
|
227 |
FKIOCTL, kcred, NULL); |
|
228 |
} |
|
1489
fa842259660e
6322205 Enable disk write cache if ZFS owns the disk
webaker
parents:
1171
diff
changeset
|
229 |
|
1732 | 230 |
/* |
231 |
* Determine the device's minimum transfer size. |
|
232 |
* If the ioctl isn't supported, assume DEV_BSIZE. |
|
233 |
*/ |
|
234 |
if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm, |
|
235 |
FKIOCTL, kcred, NULL) != 0) |
|
236 |
dkm.dki_lbsize = DEV_BSIZE; |
|
1489
fa842259660e
6322205 Enable disk write cache if ZFS owns the disk
webaker
parents:
1171
diff
changeset
|
237 |
|
1732 | 238 |
*ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1; |
1489
fa842259660e
6322205 Enable disk write cache if ZFS owns the disk
webaker
parents:
1171
diff
changeset
|
239 |
|
1773
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
240 |
/* |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
241 |
* Clear the nowritecache bit, so that on a vdev_reopen() we will |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
242 |
* try again. |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
243 |
*/ |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
244 |
vd->vdev_nowritecache = B_FALSE; |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
245 |
|
789 | 246 |
return (0); |
247 |
} |
|
248 |
||
249 |
static void |
|
250 |
vdev_disk_close(vdev_t *vd) |
|
251 |
{ |
|
252 |
vdev_disk_t *dvd = vd->vdev_tsd; |
|
253 |
||
254 |
if (dvd == NULL) |
|
255 |
return; |
|
256 |
||
257 |
if (dvd->vd_minor != NULL) |
|
258 |
ddi_devid_str_free(dvd->vd_minor); |
|
259 |
||
260 |
if (dvd->vd_devid != NULL) |
|
261 |
ddi_devid_free(dvd->vd_devid); |
|
262 |
||
263 |
if (dvd->vd_lh != NULL) |
|
264 |
(void) ldi_close(dvd->vd_lh, spa_mode, kcred); |
|
265 |
||
266 |
kmem_free(dvd, sizeof (vdev_disk_t)); |
|
267 |
vd->vdev_tsd = NULL; |
|
268 |
} |
|
269 |
||
6423 | 270 |
int |
271 |
vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size, |
|
272 |
uint64_t offset, int flags) |
|
273 |
{ |
|
274 |
buf_t *bp; |
|
275 |
int error = 0; |
|
276 |
||
277 |
if (vd_lh == NULL) |
|
278 |
return (EINVAL); |
|
279 |
||
280 |
ASSERT(flags & B_READ || flags & B_WRITE); |
|
281 |
||
282 |
bp = getrbuf(KM_SLEEP); |
|
283 |
bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; |
|
284 |
bp->b_bcount = size; |
|
285 |
bp->b_un.b_addr = (void *)data; |
|
286 |
bp->b_lblkno = lbtodb(offset); |
|
287 |
bp->b_bufsize = size; |
|
288 |
||
289 |
error = ldi_strategy(vd_lh, bp); |
|
290 |
ASSERT(error == 0); |
|
291 |
if ((error = biowait(bp)) == 0 && bp->b_resid != 0) |
|
292 |
error = EIO; |
|
293 |
freerbuf(bp); |
|
294 |
||
295 |
return (error); |
|
296 |
} |
|
297 |
||
5329 | 298 |
static int |
299 |
vdev_disk_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset, |
|
300 |
int flags) |
|
301 |
{ |
|
302 |
int error = 0; |
|
6523
c1d2a7f04573
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
ek110237
parents:
6423
diff
changeset
|
303 |
vdev_disk_t *dvd = vd ? vd->vdev_tsd : NULL; |
5329 | 304 |
|
305 |
if (vd == NULL || dvd == NULL || dvd->vd_lh == NULL) |
|
306 |
return (EINVAL); |
|
307 |
||
6423 | 308 |
error = vdev_disk_physio(dvd->vd_lh, data, size, offset, flags); |
5329 | 309 |
|
310 |
if (zio_injection_enabled && error == 0) |
|
311 |
error = zio_handle_device_injection(vd, EIO); |
|
312 |
||
313 |
return (error); |
|
314 |
} |
|
315 |
||
5369
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
316 |
/* |
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
317 |
* Determine if the underlying device is accessible by reading and writing |
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
318 |
* to a known location. We must be able to do this during syncing context |
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
319 |
* and thus we cannot set the vdev state directly. |
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
320 |
*/ |
5329 | 321 |
static int |
322 |
vdev_disk_probe(vdev_t *vd) |
|
323 |
{ |
|
324 |
uint64_t offset; |
|
325 |
vdev_t *nvd; |
|
326 |
int l, error = 0, retries = 0; |
|
327 |
char *vl_pad; |
|
328 |
||
329 |
if (vd == NULL) |
|
330 |
return (EINVAL); |
|
331 |
||
332 |
/* Hijack the current vdev */ |
|
333 |
nvd = vd; |
|
334 |
||
335 |
/* |
|
336 |
* Pick a random label to rewrite. |
|
337 |
*/ |
|
338 |
l = spa_get_random(VDEV_LABELS); |
|
339 |
ASSERT(l < VDEV_LABELS); |
|
340 |
||
341 |
offset = vdev_label_offset(vd->vdev_psize, l, |
|
342 |
offsetof(vdev_label_t, vl_pad)); |
|
343 |
||
344 |
vl_pad = kmem_alloc(VDEV_SKIP_SIZE, KM_SLEEP); |
|
345 |
||
346 |
/* |
|
347 |
* Try to read and write to a special location on the |
|
348 |
* label. We use the existing vdev initially and only |
|
349 |
* try to create and reopen it if we encounter a failure. |
|
350 |
*/ |
|
351 |
while ((error = vdev_disk_probe_io(nvd, vl_pad, VDEV_SKIP_SIZE, |
|
352 |
offset, B_READ)) != 0 && retries == 0) { |
|
353 |
||
354 |
nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); |
|
355 |
if (vd->vdev_path) |
|
356 |
nvd->vdev_path = spa_strdup(vd->vdev_path); |
|
357 |
if (vd->vdev_physpath) |
|
358 |
nvd->vdev_physpath = spa_strdup(vd->vdev_physpath); |
|
359 |
if (vd->vdev_devid) |
|
360 |
nvd->vdev_devid = spa_strdup(vd->vdev_devid); |
|
361 |
nvd->vdev_wholedisk = vd->vdev_wholedisk; |
|
362 |
nvd->vdev_guid = vd->vdev_guid; |
|
363 |
retries++; |
|
364 |
||
365 |
error = vdev_disk_open_common(nvd); |
|
5369
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
366 |
if (error) |
5329 | 367 |
break; |
368 |
} |
|
369 |
||
370 |
if (!error) { |
|
371 |
error = vdev_disk_probe_io(nvd, vl_pad, VDEV_SKIP_SIZE, |
|
372 |
offset, B_WRITE); |
|
373 |
} |
|
374 |
||
375 |
/* Clean up if we allocated a new vdev */ |
|
376 |
if (retries) { |
|
377 |
vdev_disk_close(nvd); |
|
378 |
if (nvd->vdev_path) |
|
379 |
spa_strfree(nvd->vdev_path); |
|
380 |
if (nvd->vdev_physpath) |
|
381 |
spa_strfree(nvd->vdev_physpath); |
|
382 |
if (nvd->vdev_devid) |
|
383 |
spa_strfree(nvd->vdev_devid); |
|
384 |
kmem_free(nvd, sizeof (vdev_t)); |
|
385 |
} |
|
386 |
kmem_free(vl_pad, VDEV_SKIP_SIZE); |
|
387 |
||
388 |
/* Reset the failing flag */ |
|
389 |
if (!error) |
|
390 |
vd->vdev_is_failing = B_FALSE; |
|
391 |
||
392 |
return (error); |
|
393 |
} |
|
394 |
||
789 | 395 |
static void |
396 |
vdev_disk_io_intr(buf_t *bp) |
|
397 |
{ |
|
398 |
vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp; |
|
399 |
zio_t *zio = vdb->vdb_io; |
|
400 |
||
401 |
if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0) |
|
402 |
zio->io_error = EIO; |
|
403 |
||
404 |
kmem_free(vdb, sizeof (vdev_disk_buf_t)); |
|
405 |
||
5530 | 406 |
zio_interrupt(zio); |
789 | 407 |
} |
408 |
||
409 |
static void |
|
410 |
vdev_disk_ioctl_done(void *zio_arg, int error) |
|
411 |
{ |
|
412 |
zio_t *zio = zio_arg; |
|
413 |
||
414 |
zio->io_error = error; |
|
415 |
||
5530 | 416 |
zio_interrupt(zio); |
789 | 417 |
} |
418 |
||
5530 | 419 |
static int |
789 | 420 |
vdev_disk_io_start(zio_t *zio) |
421 |
{ |
|
422 |
vdev_t *vd = zio->io_vd; |
|
423 |
vdev_disk_t *dvd = vd->vdev_tsd; |
|
424 |
vdev_disk_buf_t *vdb; |
|
425 |
buf_t *bp; |
|
426 |
int flags, error; |
|
427 |
||
428 |
if (zio->io_type == ZIO_TYPE_IOCTL) { |
|
429 |
zio_vdev_io_bypass(zio); |
|
430 |
||
431 |
/* XXPOLICY */ |
|
5329 | 432 |
if (!vdev_readable(vd)) { |
789 | 433 |
zio->io_error = ENXIO; |
5530 | 434 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 435 |
} |
436 |
||
437 |
switch (zio->io_cmd) { |
|
438 |
||
439 |
case DKIOCFLUSHWRITECACHE: |
|
440 |
||
2885 | 441 |
if (zfs_nocacheflush) |
442 |
break; |
|
443 |
||
1773
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
444 |
if (vd->vdev_nowritecache) { |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
445 |
zio->io_error = ENOTSUP; |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
446 |
break; |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
447 |
} |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
448 |
|
789 | 449 |
zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done; |
5065
fcf530c3356e
PSARC 2007/053 Per-Disk-Device support of non-volatile cache
gz161490
parents:
4455
diff
changeset
|
450 |
zio->io_dk_callback.dkc_flag = FLUSH_VOLATILE; |
789 | 451 |
zio->io_dk_callback.dkc_cookie = zio; |
452 |
||
453 |
error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, |
|
454 |
(uintptr_t)&zio->io_dk_callback, |
|
455 |
FKIOCTL, kcred, NULL); |
|
456 |
||
457 |
if (error == 0) { |
|
458 |
/* |
|
459 |
* The ioctl will be done asychronously, |
|
460 |
* and will call vdev_disk_ioctl_done() |
|
461 |
* upon completion. |
|
462 |
*/ |
|
5530 | 463 |
return (ZIO_PIPELINE_STOP); |
464 |
} |
|
465 |
||
466 |
if (error == ENOTSUP || error == ENOTTY) { |
|
1773
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
467 |
/* |
4455
122ee9c0f54c
6456223 system hang after test suite run on usb disk
mishra
parents:
4451
diff
changeset
|
468 |
* If we get ENOTSUP or ENOTTY, we know that |
122ee9c0f54c
6456223 system hang after test suite run on usb disk
mishra
parents:
4451
diff
changeset
|
469 |
* no future attempts will ever succeed. |
122ee9c0f54c
6456223 system hang after test suite run on usb disk
mishra
parents:
4451
diff
changeset
|
470 |
* In this case we set a persistent bit so |
122ee9c0f54c
6456223 system hang after test suite run on usb disk
mishra
parents:
4451
diff
changeset
|
471 |
* that we don't bother with the ioctl in the |
122ee9c0f54c
6456223 system hang after test suite run on usb disk
mishra
parents:
4451
diff
changeset
|
472 |
* future. |
1773
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
473 |
*/ |
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
474 |
vd->vdev_nowritecache = B_TRUE; |
789 | 475 |
} |
476 |
zio->io_error = error; |
|
1773
d6e4f2855c14
6407791 bringover into ZFS results in s. files newer than extracted source
eschrock
parents:
1732
diff
changeset
|
477 |
|
789 | 478 |
break; |
479 |
||
480 |
default: |
|
481 |
zio->io_error = ENOTSUP; |
|
482 |
} |
|
483 |
||
5530 | 484 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 485 |
} |
486 |
||
487 |
if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) |
|
5530 | 488 |
return (ZIO_PIPELINE_STOP); |
789 | 489 |
|
490 |
if ((zio = vdev_queue_io(zio)) == NULL) |
|
5530 | 491 |
return (ZIO_PIPELINE_STOP); |
492 |
||
493 |
if (zio->io_type == ZIO_TYPE_WRITE) |
|
494 |
error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO; |
|
495 |
else |
|
496 |
error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO; |
|
497 |
error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error; |
|
498 |
||
499 |
if (error) { |
|
500 |
zio->io_error = error; |
|
501 |
zio_interrupt(zio); |
|
502 |
return (ZIO_PIPELINE_STOP); |
|
503 |
} |
|
789 | 504 |
|
505 |
flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); |
|
506 |
flags |= B_BUSY | B_NOCACHE; |
|
507 |
if (zio->io_flags & ZIO_FLAG_FAILFAST) |
|
508 |
flags |= B_FAILFAST; |
|
509 |
||
510 |
vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP); |
|
511 |
||
512 |
vdb->vdb_io = zio; |
|
513 |
bp = &vdb->vdb_buf; |
|
514 |
||
515 |
bioinit(bp); |
|
516 |
bp->b_flags = flags; |
|
517 |
bp->b_bcount = zio->io_size; |
|
518 |
bp->b_un.b_addr = zio->io_data; |
|
519 |
bp->b_lblkno = lbtodb(zio->io_offset); |
|
520 |
bp->b_bufsize = zio->io_size; |
|
521 |
bp->b_iodone = (int (*)())vdev_disk_io_intr; |
|
522 |
||
523 |
error = ldi_strategy(dvd->vd_lh, bp); |
|
524 |
/* ldi_strategy() will return non-zero only on programming errors */ |
|
525 |
ASSERT(error == 0); |
|
5530 | 526 |
|
527 |
return (ZIO_PIPELINE_STOP); |
|
789 | 528 |
} |
529 |
||
5530 | 530 |
static int |
789 | 531 |
vdev_disk_io_done(zio_t *zio) |
532 |
{ |
|
533 |
vdev_queue_io_done(zio); |
|
534 |
||
535 |
if (zio->io_type == ZIO_TYPE_WRITE) |
|
536 |
vdev_cache_write(zio); |
|
537 |
||
1544 | 538 |
if (zio_injection_enabled && zio->io_error == 0) |
539 |
zio->io_error = zio_handle_device_injection(zio->io_vd, EIO); |
|
540 |
||
4451 | 541 |
/* |
542 |
* If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if |
|
543 |
* the device has been removed. If this is the case, then we trigger an |
|
5329 | 544 |
* asynchronous removal of the device. Otherwise, probe the device and |
5369
27c1235ef9a4
6621355 panic in vdev_disk_io_start when trying to write to a faulted device
gw25295
parents:
5329
diff
changeset
|
545 |
* make sure it's still accessible. |
4451 | 546 |
*/ |
547 |
if (zio->io_error == EIO) { |
|
5329 | 548 |
vdev_t *vd = zio->io_vd; |
549 |
vdev_disk_t *dvd = vd->vdev_tsd; |
|
550 |
int state; |
|
551 |
||
4451 | 552 |
state = DKIO_NONE; |
5329 | 553 |
if (dvd && ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state, |
4451 | 554 |
FKIOCTL, kcred, NULL) == 0 && |
555 |
state != DKIO_INSERTED) { |
|
556 |
vd->vdev_remove_wanted = B_TRUE; |
|
557 |
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); |
|
5329 | 558 |
} else if (vdev_probe(vd) != 0) { |
559 |
ASSERT(vd->vdev_ops->vdev_op_leaf); |
|
560 |
vd->vdev_is_failing = B_TRUE; |
|
4451 | 561 |
} |
562 |
} |
|
563 |
||
5530 | 564 |
return (ZIO_PIPELINE_CONTINUE); |
789 | 565 |
} |
566 |
||
567 |
vdev_ops_t vdev_disk_ops = { |
|
568 |
vdev_disk_open, |
|
569 |
vdev_disk_close, |
|
5329 | 570 |
vdev_disk_probe, |
789 | 571 |
vdev_default_asize, |
572 |
vdev_disk_io_start, |
|
573 |
vdev_disk_io_done, |
|
574 |
NULL, |
|
575 |
VDEV_TYPE_DISK, /* name of this vdev type */ |
|
576 |
B_TRUE /* leaf vdev */ |
|
577 |
}; |
|
6423 | 578 |
|
579 |
/* |
|
580 |
* Given the root disk device pathname, read the label from the device, |
|
581 |
* and construct a configuration nvlist. |
|
582 |
*/ |
|
583 |
nvlist_t * |
|
584 |
vdev_disk_read_rootlabel(char *devpath) |
|
585 |
{ |
|
586 |
nvlist_t *config = NULL; |
|
587 |
ldi_handle_t vd_lh; |
|
588 |
vdev_label_t *label; |
|
589 |
uint64_t s, size; |
|
590 |
int l; |
|
591 |
||
592 |
/* |
|
593 |
* Read the device label and build the nvlist. |
|
594 |
*/ |
|
595 |
if (ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, zfs_li)) |
|
596 |
return (NULL); |
|
597 |
||
598 |
if (ldi_get_size(vd_lh, &s)) |
|
599 |
return (NULL); |
|
600 |
||
601 |
size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); |
|
602 |
label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); |
|
603 |
||
604 |
for (l = 0; l < VDEV_LABELS; l++) { |
|
605 |
uint64_t offset, state, txg = 0; |
|
606 |
||
607 |
/* read vdev label */ |
|
608 |
offset = vdev_label_offset(size, l, 0); |
|
609 |
if (vdev_disk_physio(vd_lh, (caddr_t)label, |
|
610 |
VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE + |
|
611 |
VDEV_PHYS_SIZE, offset, B_READ) != 0) |
|
612 |
continue; |
|
613 |
||
614 |
if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, |
|
615 |
sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) { |
|
616 |
config = NULL; |
|
617 |
continue; |
|
618 |
} |
|
619 |
||
620 |
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, |
|
621 |
&state) != 0 || state >= POOL_STATE_DESTROYED) { |
|
622 |
nvlist_free(config); |
|
623 |
config = NULL; |
|
624 |
continue; |
|
625 |
} |
|
626 |
||
627 |
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, |
|
628 |
&txg) != 0 || txg == 0) { |
|
629 |
nvlist_free(config); |
|
630 |
config = NULL; |
|
631 |
continue; |
|
632 |
} |
|
633 |
||
634 |
break; |
|
635 |
} |
|
636 |
||
637 |
kmem_free(label, sizeof (vdev_label_t)); |
|
638 |
return (config); |
|
639 |
} |