author | maybee |
Mon, 19 Jun 2006 19:31:35 -0700 | |
changeset 2237 | 45affe88ed99 |
parent 2113 | 0510bb40c993 |
child 2638 | 4f583dfeae92 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1356
e021b5e4aa0e
6377671 zfs mount -a shouldn't bother checking snapshots
eschrock
parents:
1175
diff
changeset
|
22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
/* |
|
29 |
* ZFS volume emulation driver. |
|
30 |
* |
|
31 |
* Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. |
|
32 |
* Volumes are accessed through the symbolic links named: |
|
33 |
* |
|
34 |
* /dev/zvol/dsk/<pool_name>/<dataset_name> |
|
35 |
* /dev/zvol/rdsk/<pool_name>/<dataset_name> |
|
36 |
* |
|
37 |
* These links are created by the ZFS-specific devfsadm link generator. |
|
38 |
* Volumes are persistent through reboot. No user command needs to be |
|
39 |
* run before opening and using a device. |
|
40 |
*/ |
|
41 |
||
42 |
#include <sys/types.h> |
|
43 |
#include <sys/param.h> |
|
44 |
#include <sys/errno.h> |
|
45 |
#include <sys/aio_req.h> |
|
46 |
#include <sys/uio.h> |
|
47 |
#include <sys/buf.h> |
|
48 |
#include <sys/modctl.h> |
|
49 |
#include <sys/open.h> |
|
50 |
#include <sys/kmem.h> |
|
51 |
#include <sys/conf.h> |
|
52 |
#include <sys/cmn_err.h> |
|
53 |
#include <sys/stat.h> |
|
54 |
#include <sys/zap.h> |
|
55 |
#include <sys/spa.h> |
|
56 |
#include <sys/zio.h> |
|
57 |
#include <sys/dsl_prop.h> |
|
58 |
#include <sys/dkio.h> |
|
59 |
#include <sys/efi_partition.h> |
|
60 |
#include <sys/byteorder.h> |
|
61 |
#include <sys/pathname.h> |
|
62 |
#include <sys/ddi.h> |
|
63 |
#include <sys/sunddi.h> |
|
64 |
#include <sys/crc32.h> |
|
65 |
#include <sys/dirent.h> |
|
66 |
#include <sys/policy.h> |
|
67 |
#include <sys/fs/zfs.h> |
|
68 |
#include <sys/zfs_ioctl.h> |
|
69 |
#include <sys/mkdev.h> |
|
1141 | 70 |
#include <sys/zil.h> |
2237 | 71 |
#include <sys/refcount.h> |
789 | 72 |
|
73 |
#include "zfs_namecheck.h" |
|
74 |
||
75 |
#define ZVOL_OBJ 1ULL |
|
76 |
#define ZVOL_ZAP_OBJ 2ULL |
|
77 |
||
78 |
static void *zvol_state; |
|
79 |
||
80 |
/* |
|
81 |
* This lock protects the zvol_state structure from being modified |
|
82 |
* while it's being used, e.g. an open that comes in before a create |
|
83 |
* finishes. It also protects temporary opens of the dataset so that, |
|
84 |
* e.g., an open doesn't get a spurious EBUSY. |
|
85 |
*/ |
|
86 |
static kmutex_t zvol_state_lock; |
|
87 |
static uint32_t zvol_minors; |
|
88 |
||
89 |
/* |
|
90 |
* The in-core state of each volume. |
|
91 |
*/ |
|
92 |
typedef struct zvol_state { |
|
93 |
char zv_name[MAXPATHLEN]; /* pool/dd name */ |
|
94 |
uint64_t zv_volsize; /* amount of space we advertise */ |
|
95 |
minor_t zv_minor; /* minor number */ |
|
96 |
uint8_t zv_min_bs; /* minimum addressable block shift */ |
|
97 |
uint8_t zv_readonly; /* hard readonly; like write-protect */ |
|
98 |
objset_t *zv_objset; /* objset handle */ |
|
99 |
uint32_t zv_mode; /* DS_MODE_* flags at open time */ |
|
100 |
uint32_t zv_open_count[OTYPCNT]; /* open counts */ |
|
101 |
uint32_t zv_total_opens; /* total open count */ |
|
1141 | 102 |
zilog_t *zv_zilog; /* ZIL handle */ |
103 |
uint64_t zv_txg_assign; /* txg to assign during ZIL replay */ |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
104 |
krwlock_t zv_dslock; /* dmu_sync() rwlock */ |
789 | 105 |
} zvol_state_t; |
106 |
||
107 |
static void |
|
108 |
zvol_size_changed(zvol_state_t *zv, dev_t dev) |
|
109 |
{ |
|
110 |
dev = makedevice(getmajor(dev), zv->zv_minor); |
|
111 |
||
112 |
VERIFY(ddi_prop_update_int64(dev, zfs_dip, |
|
113 |
"Size", zv->zv_volsize) == DDI_SUCCESS); |
|
114 |
VERIFY(ddi_prop_update_int64(dev, zfs_dip, |
|
115 |
"Nblocks", lbtodb(zv->zv_volsize)) == DDI_SUCCESS); |
|
116 |
} |
|
117 |
||
118 |
int |
|
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
119 |
zvol_check_volsize(zfs_cmd_t *zc, uint64_t blocksize) |
789 | 120 |
{ |
121 |
if (zc->zc_volsize == 0) |
|
122 |
return (EINVAL); |
|
123 |
||
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
124 |
if (zc->zc_volsize % blocksize != 0) |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
125 |
return (EINVAL); |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
126 |
|
789 | 127 |
#ifdef _ILP32 |
128 |
if (zc->zc_volsize - 1 > SPEC_MAXOFFSET_T) |
|
129 |
return (EOVERFLOW); |
|
130 |
#endif |
|
131 |
return (0); |
|
132 |
} |
|
133 |
||
134 |
int |
|
135 |
zvol_check_volblocksize(zfs_cmd_t *zc) |
|
136 |
{ |
|
137 |
if (zc->zc_volblocksize < SPA_MINBLOCKSIZE || |
|
138 |
zc->zc_volblocksize > SPA_MAXBLOCKSIZE || |
|
139 |
!ISP2(zc->zc_volblocksize)) |
|
140 |
return (EDOM); |
|
141 |
||
142 |
return (0); |
|
143 |
} |
|
144 |
||
145 |
static void |
|
146 |
zvol_readonly_changed_cb(void *arg, uint64_t newval) |
|
147 |
{ |
|
148 |
zvol_state_t *zv = arg; |
|
149 |
||
150 |
zv->zv_readonly = (uint8_t)newval; |
|
151 |
} |
|
152 |
||
153 |
int |
|
154 |
zvol_get_stats(zfs_cmd_t *zc, objset_t *os) |
|
155 |
{ |
|
156 |
int error; |
|
157 |
dmu_object_info_t doi; |
|
158 |
||
159 |
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &zc->zc_volsize); |
|
160 |
||
161 |
if (error) |
|
162 |
return (error); |
|
163 |
||
164 |
error = dmu_object_info(os, ZVOL_OBJ, &doi); |
|
165 |
||
166 |
if (error == 0) |
|
167 |
zc->zc_volblocksize = doi.doi_data_block_size; |
|
168 |
||
169 |
return (error); |
|
170 |
} |
|
171 |
||
172 |
/* |
|
173 |
* Find a free minor number. |
|
174 |
*/ |
|
175 |
static minor_t |
|
176 |
zvol_minor_alloc(void) |
|
177 |
{ |
|
178 |
minor_t minor; |
|
179 |
||
180 |
ASSERT(MUTEX_HELD(&zvol_state_lock)); |
|
181 |
||
182 |
for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) |
|
183 |
if (ddi_get_soft_state(zvol_state, minor) == NULL) |
|
184 |
return (minor); |
|
185 |
||
186 |
return (0); |
|
187 |
} |
|
188 |
||
189 |
static zvol_state_t * |
|
190 |
zvol_minor_lookup(char *name) |
|
191 |
{ |
|
192 |
minor_t minor; |
|
193 |
zvol_state_t *zv; |
|
194 |
||
195 |
ASSERT(MUTEX_HELD(&zvol_state_lock)); |
|
196 |
||
197 |
for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { |
|
198 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
199 |
if (zv == NULL) |
|
200 |
continue; |
|
201 |
if (strcmp(zv->zv_name, name) == 0) |
|
202 |
break; |
|
203 |
} |
|
204 |
||
205 |
return (zv); |
|
206 |
} |
|
207 |
||
208 |
void |
|
209 |
zvol_create_cb(objset_t *os, void *arg, dmu_tx_t *tx) |
|
210 |
{ |
|
211 |
zfs_cmd_t *zc = arg; |
|
212 |
int error; |
|
213 |
||
214 |
error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, zc->zc_volblocksize, |
|
215 |
DMU_OT_NONE, 0, tx); |
|
216 |
ASSERT(error == 0); |
|
217 |
||
218 |
error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, |
|
219 |
DMU_OT_NONE, 0, tx); |
|
220 |
ASSERT(error == 0); |
|
221 |
||
222 |
error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &zc->zc_volsize, tx); |
|
223 |
ASSERT(error == 0); |
|
224 |
} |
|
225 |
||
226 |
/* |
|
1141 | 227 |
* Replay a TX_WRITE ZIL transaction that didn't get committed |
228 |
* after a system failure |
|
229 |
*/ |
|
230 |
static int |
|
231 |
zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) |
|
232 |
{ |
|
233 |
objset_t *os = zv->zv_objset; |
|
234 |
char *data = (char *)(lr + 1); /* data follows lr_write_t */ |
|
235 |
uint64_t off = lr->lr_offset; |
|
236 |
uint64_t len = lr->lr_length; |
|
237 |
dmu_tx_t *tx; |
|
238 |
int error; |
|
239 |
||
240 |
if (byteswap) |
|
241 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
242 |
||
243 |
tx = dmu_tx_create(os); |
|
244 |
dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); |
|
245 |
error = dmu_tx_assign(tx, zv->zv_txg_assign); |
|
246 |
if (error) { |
|
247 |
dmu_tx_abort(tx); |
|
248 |
} else { |
|
249 |
dmu_write(os, ZVOL_OBJ, off, len, data, tx); |
|
250 |
dmu_tx_commit(tx); |
|
251 |
} |
|
252 |
||
253 |
return (error); |
|
254 |
} |
|
255 |
||
256 |
/* ARGSUSED */ |
|
257 |
static int |
|
258 |
zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) |
|
259 |
{ |
|
260 |
return (ENOTSUP); |
|
261 |
} |
|
262 |
||
263 |
/* |
|
264 |
* Callback vectors for replaying records. |
|
265 |
* Only TX_WRITE is needed for zvol. |
|
266 |
*/ |
|
267 |
zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { |
|
268 |
zvol_replay_err, /* 0 no such transaction type */ |
|
269 |
zvol_replay_err, /* TX_CREATE */ |
|
270 |
zvol_replay_err, /* TX_MKDIR */ |
|
271 |
zvol_replay_err, /* TX_MKXATTR */ |
|
272 |
zvol_replay_err, /* TX_SYMLINK */ |
|
273 |
zvol_replay_err, /* TX_REMOVE */ |
|
274 |
zvol_replay_err, /* TX_RMDIR */ |
|
275 |
zvol_replay_err, /* TX_LINK */ |
|
276 |
zvol_replay_err, /* TX_RENAME */ |
|
277 |
zvol_replay_write, /* TX_WRITE */ |
|
278 |
zvol_replay_err, /* TX_TRUNCATE */ |
|
279 |
zvol_replay_err, /* TX_SETATTR */ |
|
280 |
zvol_replay_err, /* TX_ACL */ |
|
281 |
}; |
|
282 |
||
283 |
/* |
|
789 | 284 |
* Create a minor node for the specified volume. |
285 |
*/ |
|
286 |
int |
|
287 |
zvol_create_minor(zfs_cmd_t *zc) |
|
288 |
{ |
|
289 |
char *name = zc->zc_name; |
|
290 |
dev_t dev = zc->zc_dev; |
|
291 |
zvol_state_t *zv; |
|
292 |
objset_t *os; |
|
293 |
uint64_t volsize; |
|
294 |
minor_t minor = 0; |
|
295 |
struct pathname linkpath; |
|
296 |
int ds_mode = DS_MODE_PRIMARY; |
|
297 |
vnode_t *vp = NULL; |
|
298 |
char *devpath; |
|
299 |
size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + 1 + strlen(name) + 1; |
|
300 |
char chrbuf[30], blkbuf[30]; |
|
301 |
int error; |
|
302 |
||
303 |
mutex_enter(&zvol_state_lock); |
|
304 |
||
305 |
if ((zv = zvol_minor_lookup(name)) != NULL) { |
|
306 |
mutex_exit(&zvol_state_lock); |
|
307 |
return (EEXIST); |
|
308 |
} |
|
309 |
||
310 |
if (strchr(name, '@') != 0) |
|
311 |
ds_mode |= DS_MODE_READONLY; |
|
312 |
||
313 |
error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); |
|
314 |
||
315 |
if (error) { |
|
316 |
mutex_exit(&zvol_state_lock); |
|
317 |
return (error); |
|
318 |
} |
|
319 |
||
320 |
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); |
|
321 |
||
322 |
if (error) { |
|
323 |
dmu_objset_close(os); |
|
324 |
mutex_exit(&zvol_state_lock); |
|
325 |
return (error); |
|
326 |
} |
|
327 |
||
328 |
/* |
|
329 |
* If there's an existing /dev/zvol symlink, try to use the |
|
330 |
* same minor number we used last time. |
|
331 |
*/ |
|
332 |
devpath = kmem_alloc(devpathlen, KM_SLEEP); |
|
333 |
||
334 |
(void) sprintf(devpath, "%s/%s", ZVOL_FULL_DEV_DIR, name); |
|
335 |
||
336 |
error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp); |
|
337 |
||
338 |
kmem_free(devpath, devpathlen); |
|
339 |
||
340 |
if (error == 0 && vp->v_type != VLNK) |
|
341 |
error = EINVAL; |
|
342 |
||
343 |
if (error == 0) { |
|
344 |
pn_alloc(&linkpath); |
|
345 |
error = pn_getsymlink(vp, &linkpath, kcred); |
|
346 |
if (error == 0) { |
|
347 |
char *ms = strstr(linkpath.pn_path, ZVOL_PSEUDO_DEV); |
|
348 |
if (ms != NULL) { |
|
349 |
ms += strlen(ZVOL_PSEUDO_DEV); |
|
350 |
minor = stoi(&ms); |
|
351 |
} |
|
352 |
} |
|
353 |
pn_free(&linkpath); |
|
354 |
} |
|
355 |
||
356 |
if (vp != NULL) |
|
357 |
VN_RELE(vp); |
|
358 |
||
359 |
/* |
|
360 |
* If we found a minor but it's already in use, we must pick a new one. |
|
361 |
*/ |
|
362 |
if (minor != 0 && ddi_get_soft_state(zvol_state, minor) != NULL) |
|
363 |
minor = 0; |
|
364 |
||
365 |
if (minor == 0) |
|
366 |
minor = zvol_minor_alloc(); |
|
367 |
||
368 |
if (minor == 0) { |
|
369 |
dmu_objset_close(os); |
|
370 |
mutex_exit(&zvol_state_lock); |
|
371 |
return (ENXIO); |
|
372 |
} |
|
373 |
||
374 |
if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { |
|
375 |
dmu_objset_close(os); |
|
376 |
mutex_exit(&zvol_state_lock); |
|
377 |
return (EAGAIN); |
|
378 |
} |
|
379 |
||
380 |
(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, name); |
|
381 |
||
382 |
(void) sprintf(chrbuf, "%uc,raw", minor); |
|
383 |
||
384 |
if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, |
|
385 |
minor, DDI_PSEUDO, 0) == DDI_FAILURE) { |
|
386 |
ddi_soft_state_free(zvol_state, minor); |
|
387 |
dmu_objset_close(os); |
|
388 |
mutex_exit(&zvol_state_lock); |
|
389 |
return (EAGAIN); |
|
390 |
} |
|
391 |
||
392 |
(void) sprintf(blkbuf, "%uc", minor); |
|
393 |
||
394 |
if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, |
|
395 |
minor, DDI_PSEUDO, 0) == DDI_FAILURE) { |
|
396 |
ddi_remove_minor_node(zfs_dip, chrbuf); |
|
397 |
ddi_soft_state_free(zvol_state, minor); |
|
398 |
dmu_objset_close(os); |
|
399 |
mutex_exit(&zvol_state_lock); |
|
400 |
return (EAGAIN); |
|
401 |
} |
|
402 |
||
403 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
404 |
||
405 |
(void) strcpy(zv->zv_name, name); |
|
406 |
zv->zv_min_bs = DEV_BSHIFT; |
|
407 |
zv->zv_minor = minor; |
|
408 |
zv->zv_volsize = volsize; |
|
409 |
zv->zv_objset = os; |
|
410 |
zv->zv_mode = ds_mode; |
|
1141 | 411 |
zv->zv_zilog = zil_open(os, NULL); |
412 |
||
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
413 |
rw_init(&zv->zv_dslock, NULL, RW_DEFAULT, NULL); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
414 |
|
1141 | 415 |
zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector, NULL); |
789 | 416 |
|
417 |
zvol_size_changed(zv, dev); |
|
418 |
||
1544 | 419 |
/* XXX this should handle the possible i/o error */ |
789 | 420 |
VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), |
421 |
"readonly", zvol_readonly_changed_cb, zv) == 0); |
|
422 |
||
423 |
zvol_minors++; |
|
424 |
||
425 |
mutex_exit(&zvol_state_lock); |
|
426 |
||
427 |
return (0); |
|
428 |
} |
|
429 |
||
430 |
/* |
|
431 |
* Remove minor node for the specified volume. |
|
432 |
*/ |
|
433 |
int |
|
434 |
zvol_remove_minor(zfs_cmd_t *zc) |
|
435 |
{ |
|
436 |
zvol_state_t *zv; |
|
437 |
char namebuf[30]; |
|
438 |
||
439 |
mutex_enter(&zvol_state_lock); |
|
440 |
||
441 |
if ((zv = zvol_minor_lookup(zc->zc_name)) == NULL) { |
|
442 |
mutex_exit(&zvol_state_lock); |
|
443 |
return (ENXIO); |
|
444 |
} |
|
445 |
||
446 |
if (zv->zv_total_opens != 0) { |
|
447 |
mutex_exit(&zvol_state_lock); |
|
448 |
return (EBUSY); |
|
449 |
} |
|
450 |
||
451 |
(void) sprintf(namebuf, "%uc,raw", zv->zv_minor); |
|
452 |
ddi_remove_minor_node(zfs_dip, namebuf); |
|
453 |
||
454 |
(void) sprintf(namebuf, "%uc", zv->zv_minor); |
|
455 |
ddi_remove_minor_node(zfs_dip, namebuf); |
|
456 |
||
457 |
VERIFY(dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), |
|
458 |
"readonly", zvol_readonly_changed_cb, zv) == 0); |
|
459 |
||
1141 | 460 |
zil_close(zv->zv_zilog); |
461 |
zv->zv_zilog = NULL; |
|
789 | 462 |
dmu_objset_close(zv->zv_objset); |
463 |
zv->zv_objset = NULL; |
|
464 |
||
465 |
ddi_soft_state_free(zvol_state, zv->zv_minor); |
|
466 |
||
467 |
zvol_minors--; |
|
468 |
||
469 |
mutex_exit(&zvol_state_lock); |
|
470 |
||
471 |
return (0); |
|
472 |
} |
|
473 |
||
474 |
int |
|
475 |
zvol_set_volsize(zfs_cmd_t *zc) |
|
476 |
{ |
|
477 |
zvol_state_t *zv; |
|
478 |
dev_t dev = zc->zc_dev; |
|
479 |
dmu_tx_t *tx; |
|
480 |
int error; |
|
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
481 |
dmu_object_info_t doi; |
789 | 482 |
|
483 |
mutex_enter(&zvol_state_lock); |
|
484 |
||
485 |
if ((zv = zvol_minor_lookup(zc->zc_name)) == NULL) { |
|
486 |
mutex_exit(&zvol_state_lock); |
|
487 |
return (ENXIO); |
|
488 |
} |
|
489 |
||
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
490 |
if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
491 |
(error = zvol_check_volsize(zc, doi.doi_data_block_size)) != 0) { |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
492 |
mutex_exit(&zvol_state_lock); |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
493 |
return (error); |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
494 |
} |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
495 |
|
789 | 496 |
if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) { |
497 |
mutex_exit(&zvol_state_lock); |
|
498 |
return (EROFS); |
|
499 |
} |
|
500 |
||
501 |
tx = dmu_tx_create(zv->zv_objset); |
|
1544 | 502 |
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); |
789 | 503 |
dmu_tx_hold_free(tx, ZVOL_OBJ, zc->zc_volsize, DMU_OBJECT_END); |
504 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
505 |
if (error) { |
|
506 |
dmu_tx_abort(tx); |
|
507 |
mutex_exit(&zvol_state_lock); |
|
508 |
return (error); |
|
509 |
} |
|
510 |
||
511 |
error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, |
|
512 |
&zc->zc_volsize, tx); |
|
1544 | 513 |
if (error == 0) { |
514 |
error = dmu_free_range(zv->zv_objset, ZVOL_OBJ, zc->zc_volsize, |
|
789 | 515 |
DMU_OBJECT_END, tx); |
1544 | 516 |
} |
789 | 517 |
|
518 |
dmu_tx_commit(tx); |
|
519 |
||
520 |
if (error == 0) { |
|
521 |
zv->zv_volsize = zc->zc_volsize; |
|
522 |
zvol_size_changed(zv, dev); |
|
523 |
} |
|
524 |
||
525 |
mutex_exit(&zvol_state_lock); |
|
526 |
||
527 |
return (error); |
|
528 |
} |
|
529 |
||
530 |
int |
|
531 |
zvol_set_volblocksize(zfs_cmd_t *zc) |
|
532 |
{ |
|
533 |
zvol_state_t *zv; |
|
534 |
dmu_tx_t *tx; |
|
535 |
int error; |
|
536 |
||
537 |
mutex_enter(&zvol_state_lock); |
|
538 |
||
539 |
if ((zv = zvol_minor_lookup(zc->zc_name)) == NULL) { |
|
540 |
mutex_exit(&zvol_state_lock); |
|
541 |
return (ENXIO); |
|
542 |
} |
|
543 |
||
544 |
if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) { |
|
545 |
mutex_exit(&zvol_state_lock); |
|
546 |
return (EROFS); |
|
547 |
} |
|
548 |
||
549 |
tx = dmu_tx_create(zv->zv_objset); |
|
550 |
dmu_tx_hold_bonus(tx, ZVOL_OBJ); |
|
551 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
552 |
if (error) { |
|
553 |
dmu_tx_abort(tx); |
|
554 |
} else { |
|
555 |
error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ, |
|
556 |
zc->zc_volblocksize, 0, tx); |
|
557 |
if (error == ENOTSUP) |
|
558 |
error = EBUSY; |
|
559 |
dmu_tx_commit(tx); |
|
560 |
} |
|
561 |
||
562 |
mutex_exit(&zvol_state_lock); |
|
563 |
||
564 |
return (error); |
|
565 |
} |
|
566 |
||
567 |
/*ARGSUSED*/ |
|
568 |
int |
|
569 |
zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) |
|
570 |
{ |
|
571 |
minor_t minor = getminor(*devp); |
|
572 |
zvol_state_t *zv; |
|
573 |
||
574 |
if (minor == 0) /* This is the control device */ |
|
575 |
return (0); |
|
576 |
||
577 |
mutex_enter(&zvol_state_lock); |
|
578 |
||
579 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
580 |
if (zv == NULL) { |
|
581 |
mutex_exit(&zvol_state_lock); |
|
582 |
return (ENXIO); |
|
583 |
} |
|
584 |
||
585 |
ASSERT(zv->zv_objset != NULL); |
|
586 |
||
587 |
if ((flag & FWRITE) && |
|
588 |
(zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY))) { |
|
589 |
mutex_exit(&zvol_state_lock); |
|
590 |
return (EROFS); |
|
591 |
} |
|
592 |
||
593 |
if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { |
|
594 |
zv->zv_open_count[otyp]++; |
|
595 |
zv->zv_total_opens++; |
|
596 |
} |
|
597 |
||
598 |
mutex_exit(&zvol_state_lock); |
|
599 |
||
600 |
return (0); |
|
601 |
} |
|
602 |
||
603 |
/*ARGSUSED*/ |
|
604 |
int |
|
605 |
zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) |
|
606 |
{ |
|
607 |
minor_t minor = getminor(dev); |
|
608 |
zvol_state_t *zv; |
|
609 |
||
610 |
if (minor == 0) /* This is the control device */ |
|
611 |
return (0); |
|
612 |
||
613 |
mutex_enter(&zvol_state_lock); |
|
614 |
||
615 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
616 |
if (zv == NULL) { |
|
617 |
mutex_exit(&zvol_state_lock); |
|
618 |
return (ENXIO); |
|
619 |
} |
|
620 |
||
621 |
/* |
|
622 |
* The next statement is a workaround for the following DDI bug: |
|
623 |
* 6343604 specfs race: multiple "last-close" of the same device |
|
624 |
*/ |
|
625 |
if (zv->zv_total_opens == 0) { |
|
626 |
mutex_exit(&zvol_state_lock); |
|
627 |
return (0); |
|
628 |
} |
|
629 |
||
630 |
/* |
|
631 |
* If the open count is zero, this is a spurious close. |
|
632 |
* That indicates a bug in the kernel / DDI framework. |
|
633 |
*/ |
|
634 |
ASSERT(zv->zv_open_count[otyp] != 0); |
|
635 |
ASSERT(zv->zv_total_opens != 0); |
|
636 |
||
637 |
/* |
|
638 |
* You may get multiple opens, but only one close. |
|
639 |
*/ |
|
640 |
zv->zv_open_count[otyp]--; |
|
641 |
zv->zv_total_opens--; |
|
642 |
||
643 |
mutex_exit(&zvol_state_lock); |
|
644 |
||
645 |
return (0); |
|
646 |
} |
|
647 |
||
1141 | 648 |
/* |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
649 |
* Create and return an immediate write ZIL transaction. |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
650 |
*/ |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
651 |
itx_t * |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
652 |
zvol_immediate_itx(offset_t off, ssize_t len, char *addr) |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
653 |
{ |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
654 |
itx_t *itx; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
655 |
lr_write_t *lr; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
656 |
|
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
657 |
itx = zil_itx_create(TX_WRITE, sizeof (*lr) + len); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
658 |
lr = (lr_write_t *)&itx->itx_lr; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
659 |
lr->lr_foid = ZVOL_OBJ; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
660 |
lr->lr_offset = off; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
661 |
lr->lr_length = len; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
662 |
lr->lr_blkoff = 0; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
663 |
BP_ZERO(&lr->lr_blkptr); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
664 |
bcopy(addr, (char *)itx + offsetof(itx_t, itx_lr) + |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
665 |
sizeof (*lr), len); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
666 |
itx->itx_wr_state = WR_COPIED; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
667 |
return (itx); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
668 |
} |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
669 |
|
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
670 |
/* |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
671 |
* zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. |
1141 | 672 |
* |
673 |
* We store data in the log buffers if it's small enough. |
|
674 |
* Otherwise we flush the data out via dmu_sync(). |
|
675 |
*/ |
|
676 |
ssize_t zvol_immediate_write_sz = 65536; |
|
677 |
||
678 |
int |
|
679 |
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len, |
|
680 |
char *addr) |
|
681 |
{ |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
682 |
dmu_object_info_t doi; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
683 |
ssize_t nbytes; |
1141 | 684 |
itx_t *itx; |
685 |
lr_write_t *lr; |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
686 |
objset_t *os; |
2237 | 687 |
dmu_buf_t *db; |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
688 |
uint64_t txg; |
2237 | 689 |
uint64_t boff; |
1141 | 690 |
int error; |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
691 |
uint32_t blocksize; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
692 |
|
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
693 |
/* handle common case */ |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
694 |
if (len <= zvol_immediate_write_sz) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
695 |
itx = zvol_immediate_itx(off, len, addr); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
696 |
(void) zil_itx_assign(zv->zv_zilog, itx, tx); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
697 |
return (0); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
698 |
} |
1141 | 699 |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
700 |
txg = dmu_tx_get_txg(tx); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
701 |
os = zv->zv_objset; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
702 |
|
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
703 |
/* |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
704 |
* We need to dmu_sync() each block in the range. |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
705 |
* For this we need the blocksize. |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
706 |
*/ |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
707 |
error = dmu_object_info(os, ZVOL_OBJ, &doi); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
708 |
if (error) |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
709 |
return (error); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
710 |
blocksize = doi.doi_data_block_size; |
1141 | 711 |
|
712 |
/* |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
713 |
* We need to immediate write or dmu_sync() each block in the range. |
1141 | 714 |
*/ |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
715 |
while (len) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
716 |
nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
717 |
if (nbytes <= zvol_immediate_write_sz) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
718 |
itx = zvol_immediate_itx(off, nbytes, addr); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
719 |
} else { |
2237 | 720 |
boff = P2ALIGN_TYPED(off, blocksize, uint64_t); |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
721 |
itx = zil_itx_create(TX_WRITE, sizeof (*lr)); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
722 |
lr = (lr_write_t *)&itx->itx_lr; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
723 |
lr->lr_foid = ZVOL_OBJ; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
724 |
lr->lr_offset = off; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
725 |
lr->lr_length = nbytes; |
2237 | 726 |
lr->lr_blkoff = off - boff; |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
727 |
BP_ZERO(&lr->lr_blkptr); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
728 |
|
2237 | 729 |
/* XXX - we should do these IOs in parallel */ |
730 |
VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, boff, |
|
731 |
FTAG, &db)); |
|
732 |
ASSERT(boff == db->db_offset); |
|
733 |
error = dmu_sync(NULL, db, &lr->lr_blkptr, |
|
734 |
txg, NULL, NULL); |
|
735 |
dmu_buf_rele(db, FTAG); |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
736 |
if (error) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
737 |
kmem_free(itx, offsetof(itx_t, itx_lr)); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
738 |
return (error); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
739 |
} |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
740 |
itx->itx_wr_state = WR_COPIED; |
1141 | 741 |
} |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
742 |
(void) zil_itx_assign(zv->zv_zilog, itx, tx); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
743 |
len -= nbytes; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
744 |
off += nbytes; |
1141 | 745 |
} |
746 |
return (0); |
|
747 |
} |
|
748 |
||
789 | 749 |
int |
750 |
zvol_strategy(buf_t *bp) |
|
751 |
{ |
|
752 |
zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); |
|
753 |
uint64_t off, volsize; |
|
754 |
size_t size, resid; |
|
755 |
char *addr; |
|
1141 | 756 |
objset_t *os; |
789 | 757 |
int error = 0; |
1141 | 758 |
int sync; |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
759 |
int reading; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
760 |
int txg_sync_needed = B_FALSE; |
789 | 761 |
|
762 |
if (zv == NULL) { |
|
763 |
bioerror(bp, ENXIO); |
|
764 |
biodone(bp); |
|
765 |
return (0); |
|
766 |
} |
|
767 |
||
768 |
if (getminor(bp->b_edev) == 0) { |
|
769 |
bioerror(bp, EINVAL); |
|
770 |
biodone(bp); |
|
771 |
return (0); |
|
772 |
} |
|
773 |
||
774 |
if (zv->zv_readonly && !(bp->b_flags & B_READ)) { |
|
775 |
bioerror(bp, EROFS); |
|
776 |
biodone(bp); |
|
777 |
return (0); |
|
778 |
} |
|
779 |
||
780 |
off = ldbtob(bp->b_blkno); |
|
781 |
volsize = zv->zv_volsize; |
|
782 |
||
1141 | 783 |
os = zv->zv_objset; |
784 |
ASSERT(os != NULL); |
|
785 |
sync = !(bp->b_flags & B_ASYNC) && !(zil_disable); |
|
789 | 786 |
|
787 |
bp_mapin(bp); |
|
788 |
addr = bp->b_un.b_addr; |
|
789 |
resid = bp->b_bcount; |
|
790 |
||
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
791 |
/* |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
792 |
* There must be no buffer changes when doing a dmu_sync() because |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
793 |
* we can't change the data whilst calculating the checksum. |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
794 |
* A better approach than a per zvol rwlock would be to lock ranges. |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
795 |
*/ |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
796 |
reading = bp->b_flags & B_READ; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
797 |
if (reading || resid <= zvol_immediate_write_sz) |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
798 |
rw_enter(&zv->zv_dslock, RW_READER); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
799 |
else |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
800 |
rw_enter(&zv->zv_dslock, RW_WRITER); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
801 |
|
789 | 802 |
while (resid != 0 && off < volsize) { |
803 |
||
804 |
size = MIN(resid, 1UL << 20); /* cap at 1MB per tx */ |
|
805 |
||
806 |
if (size > volsize - off) /* don't write past the end */ |
|
807 |
size = volsize - off; |
|
808 |
||
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
809 |
if (reading) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
810 |
error = dmu_read(os, ZVOL_OBJ, off, size, addr); |
789 | 811 |
} else { |
1141 | 812 |
dmu_tx_t *tx = dmu_tx_create(os); |
789 | 813 |
dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); |
814 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
815 |
if (error) { |
|
816 |
dmu_tx_abort(tx); |
|
817 |
} else { |
|
1141 | 818 |
dmu_write(os, ZVOL_OBJ, off, size, addr, tx); |
819 |
if (sync) { |
|
820 |
/* use the ZIL to commit this write */ |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
821 |
if (zvol_log_write(zv, tx, off, size, |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
822 |
addr) != 0) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
823 |
txg_sync_needed = B_TRUE; |
1141 | 824 |
} |
825 |
} |
|
789 | 826 |
dmu_tx_commit(tx); |
827 |
} |
|
828 |
} |
|
829 |
if (error) |
|
830 |
break; |
|
831 |
off += size; |
|
832 |
addr += size; |
|
833 |
resid -= size; |
|
834 |
} |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
835 |
rw_exit(&zv->zv_dslock); |
789 | 836 |
|
837 |
if ((bp->b_resid = resid) == bp->b_bcount) |
|
838 |
bioerror(bp, off > volsize ? EINVAL : error); |
|
839 |
||
840 |
biodone(bp); |
|
1141 | 841 |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
842 |
if (sync) { |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
843 |
if (txg_sync_needed) |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
844 |
txg_wait_synced(dmu_objset_pool(os), 0); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
845 |
else |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
846 |
zil_commit(zv->zv_zilog, UINT64_MAX, FDSYNC); |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
847 |
} |
1141 | 848 |
|
789 | 849 |
return (0); |
850 |
} |
|
851 |
||
852 |
/*ARGSUSED*/ |
|
853 |
int |
|
854 |
zvol_read(dev_t dev, uio_t *uiop, cred_t *cr) |
|
855 |
{ |
|
856 |
return (physio(zvol_strategy, NULL, dev, B_READ, minphys, uiop)); |
|
857 |
} |
|
858 |
||
859 |
/*ARGSUSED*/ |
|
860 |
int |
|
861 |
zvol_write(dev_t dev, uio_t *uiop, cred_t *cr) |
|
862 |
{ |
|
863 |
return (physio(zvol_strategy, NULL, dev, B_WRITE, minphys, uiop)); |
|
864 |
} |
|
865 |
||
866 |
/*ARGSUSED*/ |
|
867 |
int |
|
868 |
zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr) |
|
869 |
{ |
|
870 |
return (aphysio(zvol_strategy, anocancel, dev, B_READ, minphys, aio)); |
|
871 |
} |
|
872 |
||
873 |
/*ARGSUSED*/ |
|
874 |
int |
|
875 |
zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr) |
|
876 |
{ |
|
877 |
return (aphysio(zvol_strategy, anocancel, dev, B_WRITE, minphys, aio)); |
|
878 |
} |
|
879 |
||
880 |
/* |
|
881 |
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). |
|
882 |
*/ |
|
883 |
/*ARGSUSED*/ |
|
884 |
int |
|
885 |
zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) |
|
886 |
{ |
|
887 |
zvol_state_t *zv; |
|
888 |
struct dk_cinfo dkc; |
|
889 |
struct dk_minfo dkm; |
|
890 |
dk_efi_t efi; |
|
891 |
efi_gpt_t gpt; |
|
892 |
efi_gpe_t gpe; |
|
893 |
struct uuid uuid = EFI_RESERVED; |
|
894 |
uint32_t crc; |
|
895 |
int error = 0; |
|
896 |
||
897 |
mutex_enter(&zvol_state_lock); |
|
898 |
||
899 |
zv = ddi_get_soft_state(zvol_state, getminor(dev)); |
|
900 |
||
901 |
if (zv == NULL) { |
|
902 |
mutex_exit(&zvol_state_lock); |
|
903 |
return (ENXIO); |
|
904 |
} |
|
905 |
||
906 |
switch (cmd) { |
|
907 |
||
908 |
case DKIOCINFO: |
|
909 |
bzero(&dkc, sizeof (dkc)); |
|
910 |
(void) strcpy(dkc.dki_cname, "zvol"); |
|
911 |
(void) strcpy(dkc.dki_dname, "zvol"); |
|
912 |
dkc.dki_ctype = DKC_UNKNOWN; |
|
1357
46e683ebd8ea
6367517 32-bit kernel VA exhaustion with logging ufs atop zvol
bonwick
parents:
1356
diff
changeset
|
913 |
dkc.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); |
789 | 914 |
mutex_exit(&zvol_state_lock); |
915 |
if (ddi_copyout(&dkc, (void *)arg, sizeof (dkc), flag)) |
|
916 |
error = EFAULT; |
|
917 |
return (error); |
|
918 |
||
919 |
case DKIOCGMEDIAINFO: |
|
920 |
bzero(&dkm, sizeof (dkm)); |
|
921 |
dkm.dki_lbsize = 1U << zv->zv_min_bs; |
|
922 |
dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; |
|
923 |
dkm.dki_media_type = DK_UNKNOWN; |
|
924 |
mutex_exit(&zvol_state_lock); |
|
925 |
if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) |
|
926 |
error = EFAULT; |
|
927 |
return (error); |
|
928 |
||
929 |
case DKIOCGETEFI: |
|
930 |
if (ddi_copyin((void *)arg, &efi, sizeof (dk_efi_t), flag)) { |
|
931 |
mutex_exit(&zvol_state_lock); |
|
932 |
return (EFAULT); |
|
933 |
} |
|
934 |
||
935 |
bzero(&gpt, sizeof (gpt)); |
|
936 |
bzero(&gpe, sizeof (gpe)); |
|
937 |
||
938 |
efi.dki_data = (void *)(uintptr_t)efi.dki_data_64; |
|
939 |
||
940 |
if (efi.dki_length < sizeof (gpt) + sizeof (gpe)) { |
|
941 |
mutex_exit(&zvol_state_lock); |
|
942 |
return (EINVAL); |
|
943 |
} |
|
944 |
||
945 |
efi.dki_length = sizeof (gpt) + sizeof (gpe); |
|
946 |
||
947 |
gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); |
|
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
948 |
gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); |
789 | 949 |
gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); |
950 |
gpt.efi_gpt_FirstUsableLBA = LE_64(0ULL); |
|
951 |
gpt.efi_gpt_LastUsableLBA = |
|
952 |
LE_64((zv->zv_volsize >> zv->zv_min_bs) - 1); |
|
953 |
gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); |
|
954 |
gpt.efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (gpe)); |
|
955 |
||
956 |
UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); |
|
957 |
gpe.efi_gpe_StartingLBA = gpt.efi_gpt_FirstUsableLBA; |
|
958 |
gpe.efi_gpe_EndingLBA = gpt.efi_gpt_LastUsableLBA; |
|
959 |
||
960 |
CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); |
|
961 |
gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); |
|
962 |
||
963 |
CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); |
|
964 |
gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); |
|
965 |
||
966 |
mutex_exit(&zvol_state_lock); |
|
967 |
if (ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), flag) || |
|
968 |
ddi_copyout(&gpe, efi.dki_data + 1, sizeof (gpe), flag)) |
|
969 |
error = EFAULT; |
|
970 |
return (error); |
|
971 |
||
972 |
default: |
|
973 |
error = ENOTSUP; |
|
974 |
break; |
|
975 |
||
976 |
} |
|
977 |
mutex_exit(&zvol_state_lock); |
|
978 |
return (error); |
|
979 |
} |
|
980 |
||
981 |
int |
|
982 |
zvol_busy(void) |
|
983 |
{ |
|
984 |
return (zvol_minors != 0); |
|
985 |
} |
|
986 |
||
987 |
void |
|
988 |
zvol_init(void) |
|
989 |
{ |
|
990 |
VERIFY(ddi_soft_state_init(&zvol_state, sizeof (zvol_state_t), 1) == 0); |
|
991 |
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); |
|
992 |
} |
|
993 |
||
994 |
void |
|
995 |
zvol_fini(void) |
|
996 |
{ |
|
997 |
mutex_destroy(&zvol_state_lock); |
|
998 |
ddi_soft_state_fini(&zvol_state); |
|
999 |
} |