author | Neil Perrin <Neil.Perrin@Sun.COM> |
Tue, 25 Nov 2008 13:18:25 -0700 | |
changeset 8227 | f7d7be9b1f56 |
parent 7903 | 4c8fa38f91ec |
child 8524 | a56dffa8fba9 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
6423 | 22 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
/* |
|
27 |
* ZFS volume emulation driver. |
|
28 |
* |
|
29 |
* Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. |
|
30 |
* Volumes are accessed through the symbolic links named: |
|
31 |
* |
|
32 |
* /dev/zvol/dsk/<pool_name>/<dataset_name> |
|
33 |
* /dev/zvol/rdsk/<pool_name>/<dataset_name> |
|
34 |
* |
|
35 |
* These links are created by the ZFS-specific devfsadm link generator. |
|
36 |
* Volumes are persistent through reboot. No user command needs to be |
|
37 |
* run before opening and using a device. |
|
38 |
*/ |
|
39 |
||
40 |
#include <sys/types.h> |
|
41 |
#include <sys/param.h> |
|
42 |
#include <sys/errno.h> |
|
43 |
#include <sys/uio.h> |
|
44 |
#include <sys/buf.h> |
|
45 |
#include <sys/modctl.h> |
|
46 |
#include <sys/open.h> |
|
47 |
#include <sys/kmem.h> |
|
48 |
#include <sys/conf.h> |
|
49 |
#include <sys/cmn_err.h> |
|
50 |
#include <sys/stat.h> |
|
51 |
#include <sys/zap.h> |
|
52 |
#include <sys/spa.h> |
|
53 |
#include <sys/zio.h> |
|
6423 | 54 |
#include <sys/dmu_traverse.h> |
55 |
#include <sys/dnode.h> |
|
56 |
#include <sys/dsl_dataset.h> |
|
789 | 57 |
#include <sys/dsl_prop.h> |
58 |
#include <sys/dkio.h> |
|
59 |
#include <sys/efi_partition.h> |
|
60 |
#include <sys/byteorder.h> |
|
61 |
#include <sys/pathname.h> |
|
62 |
#include <sys/ddi.h> |
|
63 |
#include <sys/sunddi.h> |
|
64 |
#include <sys/crc32.h> |
|
65 |
#include <sys/dirent.h> |
|
66 |
#include <sys/policy.h> |
|
67 |
#include <sys/fs/zfs.h> |
|
68 |
#include <sys/zfs_ioctl.h> |
|
69 |
#include <sys/mkdev.h> |
|
1141 | 70 |
#include <sys/zil.h> |
2237 | 71 |
#include <sys/refcount.h> |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
72 |
#include <sys/zfs_znode.h> |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
73 |
#include <sys/zfs_rlock.h> |
6423 | 74 |
#include <sys/vdev_disk.h> |
75 |
#include <sys/vdev_impl.h> |
|
76 |
#include <sys/zvol.h> |
|
77 |
#include <sys/dumphdr.h> |
|
8227 | 78 |
#include <sys/zil_impl.h> |
789 | 79 |
|
80 |
#include "zfs_namecheck.h" |
|
81 |
||
6423 | 82 |
static void *zvol_state; |
789 | 83 |
|
6423 | 84 |
#define ZVOL_DUMPSIZE "dumpsize" |
789 | 85 |
|
86 |
/* |
|
87 |
* This lock protects the zvol_state structure from being modified |
|
88 |
* while it's being used, e.g. an open that comes in before a create |
|
89 |
* finishes. It also protects temporary opens of the dataset so that, |
|
90 |
* e.g., an open doesn't get a spurious EBUSY. |
|
91 |
*/ |
|
92 |
static kmutex_t zvol_state_lock; |
|
93 |
static uint32_t zvol_minors; |
|
94 |
||
6423 | 95 |
typedef struct zvol_extent { |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
96 |
list_node_t ze_node; |
6423 | 97 |
dva_t ze_dva; /* dva associated with this extent */ |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
98 |
uint64_t ze_nblks; /* number of blocks in extent */ |
6423 | 99 |
} zvol_extent_t; |
100 |
||
101 |
/* |
|
789 | 102 |
* The in-core state of each volume. |
103 |
*/ |
|
104 |
typedef struct zvol_state { |
|
105 |
char zv_name[MAXPATHLEN]; /* pool/dd name */ |
|
106 |
uint64_t zv_volsize; /* amount of space we advertise */ |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
107 |
uint64_t zv_volblocksize; /* volume block size */ |
789 | 108 |
minor_t zv_minor; /* minor number */ |
109 |
uint8_t zv_min_bs; /* minimum addressable block shift */ |
|
6423 | 110 |
uint8_t zv_flags; /* readonly; dumpified */ |
789 | 111 |
objset_t *zv_objset; /* objset handle */ |
112 |
uint32_t zv_mode; /* DS_MODE_* flags at open time */ |
|
113 |
uint32_t zv_open_count[OTYPCNT]; /* open counts */ |
|
114 |
uint32_t zv_total_opens; /* total open count */ |
|
1141 | 115 |
zilog_t *zv_zilog; /* ZIL handle */ |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
116 |
list_t zv_extents; /* List of extents for dump */ |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
117 |
znode_t zv_znode; /* for range locking */ |
789 | 118 |
} zvol_state_t; |
119 |
||
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
120 |
/* |
6423 | 121 |
* zvol specific flags |
122 |
*/ |
|
123 |
#define ZVOL_RDONLY 0x1 |
|
124 |
#define ZVOL_DUMPIFIED 0x2 |
|
7405
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
125 |
#define ZVOL_EXCL 0x4 |
6423 | 126 |
|
127 |
/* |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
128 |
* zvol maximum transfer in one DMU tx. |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
129 |
*/ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
130 |
int zvol_maxphys = DMU_MAX_ACCESS/2; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
131 |
|
6423 | 132 |
extern int zfs_set_prop_nvlist(const char *, nvlist_t *); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
133 |
static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); |
6423 | 134 |
static int zvol_dumpify(zvol_state_t *zv); |
135 |
static int zvol_dump_fini(zvol_state_t *zv); |
|
136 |
static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
137 |
|
789 | 138 |
static void |
4787 | 139 |
zvol_size_changed(zvol_state_t *zv, major_t maj) |
789 | 140 |
{ |
4787 | 141 |
dev_t dev = makedevice(maj, zv->zv_minor); |
789 | 142 |
|
143 |
VERIFY(ddi_prop_update_int64(dev, zfs_dip, |
|
144 |
"Size", zv->zv_volsize) == DDI_SUCCESS); |
|
145 |
VERIFY(ddi_prop_update_int64(dev, zfs_dip, |
|
146 |
"Nblocks", lbtodb(zv->zv_volsize)) == DDI_SUCCESS); |
|
6423 | 147 |
|
148 |
/* Notify specfs to invalidate the cached size */ |
|
149 |
spec_size_invalidate(dev, VBLK); |
|
150 |
spec_size_invalidate(dev, VCHR); |
|
789 | 151 |
} |
152 |
||
153 |
int |
|
2676 | 154 |
zvol_check_volsize(uint64_t volsize, uint64_t blocksize) |
789 | 155 |
{ |
2676 | 156 |
if (volsize == 0) |
789 | 157 |
return (EINVAL); |
158 |
||
2676 | 159 |
if (volsize % blocksize != 0) |
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
160 |
return (EINVAL); |
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
161 |
|
789 | 162 |
#ifdef _ILP32 |
2676 | 163 |
if (volsize - 1 > SPEC_MAXOFFSET_T) |
789 | 164 |
return (EOVERFLOW); |
165 |
#endif |
|
166 |
return (0); |
|
167 |
} |
|
168 |
||
169 |
int |
|
2676 | 170 |
zvol_check_volblocksize(uint64_t volblocksize) |
789 | 171 |
{ |
2676 | 172 |
if (volblocksize < SPA_MINBLOCKSIZE || |
173 |
volblocksize > SPA_MAXBLOCKSIZE || |
|
174 |
!ISP2(volblocksize)) |
|
789 | 175 |
return (EDOM); |
176 |
||
177 |
return (0); |
|
178 |
} |
|
179 |
||
180 |
static void |
|
181 |
zvol_readonly_changed_cb(void *arg, uint64_t newval) |
|
182 |
{ |
|
183 |
zvol_state_t *zv = arg; |
|
184 |
||
6423 | 185 |
if (newval) |
186 |
zv->zv_flags |= ZVOL_RDONLY; |
|
187 |
else |
|
188 |
zv->zv_flags &= ~ZVOL_RDONLY; |
|
789 | 189 |
} |
190 |
||
191 |
int |
|
2885 | 192 |
zvol_get_stats(objset_t *os, nvlist_t *nv) |
789 | 193 |
{ |
194 |
int error; |
|
195 |
dmu_object_info_t doi; |
|
2885 | 196 |
uint64_t val; |
789 | 197 |
|
198 |
||
2885 | 199 |
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); |
789 | 200 |
if (error) |
201 |
return (error); |
|
202 |
||
2885 | 203 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); |
204 |
||
789 | 205 |
error = dmu_object_info(os, ZVOL_OBJ, &doi); |
206 |
||
2885 | 207 |
if (error == 0) { |
208 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, |
|
209 |
doi.doi_data_block_size); |
|
210 |
} |
|
789 | 211 |
|
212 |
return (error); |
|
213 |
} |
|
214 |
||
215 |
/* |
|
216 |
* Find a free minor number. |
|
217 |
*/ |
|
218 |
static minor_t |
|
219 |
zvol_minor_alloc(void) |
|
220 |
{ |
|
221 |
minor_t minor; |
|
222 |
||
223 |
ASSERT(MUTEX_HELD(&zvol_state_lock)); |
|
224 |
||
225 |
for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) |
|
226 |
if (ddi_get_soft_state(zvol_state, minor) == NULL) |
|
227 |
return (minor); |
|
228 |
||
229 |
return (0); |
|
230 |
} |
|
231 |
||
232 |
static zvol_state_t * |
|
2676 | 233 |
zvol_minor_lookup(const char *name) |
789 | 234 |
{ |
235 |
minor_t minor; |
|
236 |
zvol_state_t *zv; |
|
237 |
||
238 |
ASSERT(MUTEX_HELD(&zvol_state_lock)); |
|
239 |
||
240 |
for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { |
|
241 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
242 |
if (zv == NULL) |
|
243 |
continue; |
|
244 |
if (strcmp(zv->zv_name, name) == 0) |
|
245 |
break; |
|
246 |
} |
|
247 |
||
248 |
return (zv); |
|
249 |
} |
|
250 |
||
6423 | 251 |
/* extent mapping arg */ |
252 |
struct maparg { |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
253 |
zvol_state_t *ma_zv; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
254 |
uint64_t ma_blks; |
6423 | 255 |
}; |
256 |
||
257 |
/*ARGSUSED*/ |
|
258 |
static int |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
259 |
zvol_map_block(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
260 |
const dnode_phys_t *dnp, void *arg) |
6423 | 261 |
{ |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
262 |
struct maparg *ma = arg; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
263 |
zvol_extent_t *ze; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
264 |
int bs = ma->ma_zv->zv_volblocksize; |
6423 | 265 |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
266 |
if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) |
6423 | 267 |
return (0); |
268 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
269 |
VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
270 |
ma->ma_blks++; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
271 |
|
6423 | 272 |
/* Abort immediately if we have encountered gang blocks */ |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
273 |
if (BP_IS_GANG(bp)) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
274 |
return (EFRAGS); |
6423 | 275 |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
276 |
/* |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
277 |
* See if the block is at the end of the previous extent. |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
278 |
*/ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
279 |
ze = list_tail(&ma->ma_zv->zv_extents); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
280 |
if (ze && |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
281 |
DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
282 |
DVA_GET_OFFSET(BP_IDENTITY(bp)) == |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
283 |
DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
284 |
ze->ze_nblks++; |
6423 | 285 |
return (0); |
286 |
} |
|
287 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
288 |
dprintf_bp(bp, "%s", "next blkptr:"); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
289 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
290 |
/* start a new extent */ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
291 |
ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
292 |
ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
293 |
ze->ze_nblks = 1; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
294 |
list_insert_tail(&ma->ma_zv->zv_extents, ze); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
295 |
return (0); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
296 |
} |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
297 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
298 |
static void |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
299 |
zvol_free_extents(zvol_state_t *zv) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
300 |
{ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
301 |
zvol_extent_t *ze; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
302 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
303 |
while (ze = list_head(&zv->zv_extents)) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
304 |
list_remove(&zv->zv_extents, ze); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
305 |
kmem_free(ze, sizeof (zvol_extent_t)); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
306 |
} |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
307 |
} |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
308 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
309 |
static int |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
310 |
zvol_get_lbas(zvol_state_t *zv) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
311 |
{ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
312 |
struct maparg ma; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
313 |
int err; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
314 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
315 |
ma.ma_zv = zv; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
316 |
ma.ma_blks = 0; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
317 |
zvol_free_extents(zv); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
318 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
319 |
err = traverse_dataset(dmu_objset_ds(zv->zv_objset), 0, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
320 |
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
321 |
if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
322 |
zvol_free_extents(zv); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
323 |
return (err ? err : EIO); |
6423 | 324 |
} |
325 |
||
326 |
return (0); |
|
327 |
} |
|
328 |
||
4543 | 329 |
/* ARGSUSED */ |
789 | 330 |
void |
4543 | 331 |
zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) |
789 | 332 |
{ |
5331 | 333 |
zfs_creat_t *zct = arg; |
334 |
nvlist_t *nvprops = zct->zct_props; |
|
789 | 335 |
int error; |
2676 | 336 |
uint64_t volblocksize, volsize; |
789 | 337 |
|
4543 | 338 |
VERIFY(nvlist_lookup_uint64(nvprops, |
2676 | 339 |
zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); |
4543 | 340 |
if (nvlist_lookup_uint64(nvprops, |
2676 | 341 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) |
342 |
volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); |
|
343 |
||
344 |
/* |
|
6423 | 345 |
* These properties must be removed from the list so the generic |
2676 | 346 |
* property setting step won't apply to them. |
347 |
*/ |
|
4543 | 348 |
VERIFY(nvlist_remove_all(nvprops, |
2676 | 349 |
zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); |
4543 | 350 |
(void) nvlist_remove_all(nvprops, |
2676 | 351 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); |
352 |
||
353 |
error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, |
|
789 | 354 |
DMU_OT_NONE, 0, tx); |
355 |
ASSERT(error == 0); |
|
356 |
||
357 |
error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, |
|
358 |
DMU_OT_NONE, 0, tx); |
|
359 |
ASSERT(error == 0); |
|
360 |
||
2676 | 361 |
error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); |
789 | 362 |
ASSERT(error == 0); |
363 |
} |
|
364 |
||
365 |
/* |
|
1141 | 366 |
* Replay a TX_WRITE ZIL transaction that didn't get committed |
367 |
* after a system failure |
|
368 |
*/ |
|
369 |
static int |
|
370 |
zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) |
|
371 |
{ |
|
372 |
objset_t *os = zv->zv_objset; |
|
373 |
char *data = (char *)(lr + 1); /* data follows lr_write_t */ |
|
374 |
uint64_t off = lr->lr_offset; |
|
375 |
uint64_t len = lr->lr_length; |
|
376 |
dmu_tx_t *tx; |
|
377 |
int error; |
|
378 |
||
379 |
if (byteswap) |
|
380 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
381 |
||
382 |
tx = dmu_tx_create(os); |
|
383 |
dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); |
|
8227 | 384 |
error = dmu_tx_assign(tx, TXG_WAIT); |
1141 | 385 |
if (error) { |
386 |
dmu_tx_abort(tx); |
|
387 |
} else { |
|
388 |
dmu_write(os, ZVOL_OBJ, off, len, data, tx); |
|
389 |
dmu_tx_commit(tx); |
|
390 |
} |
|
391 |
||
392 |
return (error); |
|
393 |
} |
|
394 |
||
395 |
/* ARGSUSED */ |
|
396 |
static int |
|
397 |
zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) |
|
398 |
{ |
|
399 |
return (ENOTSUP); |
|
400 |
} |
|
401 |
||
402 |
/* |
|
403 |
* Callback vectors for replaying records. |
|
404 |
* Only TX_WRITE is needed for zvol. |
|
405 |
*/ |
|
406 |
zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { |
|
407 |
zvol_replay_err, /* 0 no such transaction type */ |
|
408 |
zvol_replay_err, /* TX_CREATE */ |
|
409 |
zvol_replay_err, /* TX_MKDIR */ |
|
410 |
zvol_replay_err, /* TX_MKXATTR */ |
|
411 |
zvol_replay_err, /* TX_SYMLINK */ |
|
412 |
zvol_replay_err, /* TX_REMOVE */ |
|
413 |
zvol_replay_err, /* TX_RMDIR */ |
|
414 |
zvol_replay_err, /* TX_LINK */ |
|
415 |
zvol_replay_err, /* TX_RENAME */ |
|
416 |
zvol_replay_write, /* TX_WRITE */ |
|
417 |
zvol_replay_err, /* TX_TRUNCATE */ |
|
418 |
zvol_replay_err, /* TX_SETATTR */ |
|
419 |
zvol_replay_err, /* TX_ACL */ |
|
420 |
}; |
|
421 |
||
422 |
/* |
|
6423 | 423 |
* Create a minor node (plus a whole lot more) for the specified volume. |
789 | 424 |
*/ |
425 |
int |
|
4787 | 426 |
zvol_create_minor(const char *name, major_t maj) |
789 | 427 |
{ |
428 |
zvol_state_t *zv; |
|
429 |
objset_t *os; |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
430 |
dmu_object_info_t doi; |
789 | 431 |
uint64_t volsize; |
432 |
minor_t minor = 0; |
|
433 |
struct pathname linkpath; |
|
6689
47572a2f5e73
6610506 Eliminate or improve retry logic from callers of dmu_objset_open()
maybee
parents:
6423
diff
changeset
|
434 |
int ds_mode = DS_MODE_OWNER; |
789 | 435 |
vnode_t *vp = NULL; |
436 |
char *devpath; |
|
6423 | 437 |
size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(name) + 1; |
789 | 438 |
char chrbuf[30], blkbuf[30]; |
439 |
int error; |
|
440 |
||
441 |
mutex_enter(&zvol_state_lock); |
|
442 |
||
443 |
if ((zv = zvol_minor_lookup(name)) != NULL) { |
|
444 |
mutex_exit(&zvol_state_lock); |
|
445 |
return (EEXIST); |
|
446 |
} |
|
447 |
||
448 |
if (strchr(name, '@') != 0) |
|
449 |
ds_mode |= DS_MODE_READONLY; |
|
450 |
||
451 |
error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); |
|
452 |
||
453 |
if (error) { |
|
454 |
mutex_exit(&zvol_state_lock); |
|
455 |
return (error); |
|
456 |
} |
|
457 |
||
458 |
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); |
|
459 |
||
460 |
if (error) { |
|
461 |
dmu_objset_close(os); |
|
462 |
mutex_exit(&zvol_state_lock); |
|
463 |
return (error); |
|
464 |
} |
|
465 |
||
466 |
/* |
|
467 |
* If there's an existing /dev/zvol symlink, try to use the |
|
468 |
* same minor number we used last time. |
|
469 |
*/ |
|
470 |
devpath = kmem_alloc(devpathlen, KM_SLEEP); |
|
471 |
||
6423 | 472 |
(void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, name); |
789 | 473 |
|
474 |
error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp); |
|
475 |
||
476 |
kmem_free(devpath, devpathlen); |
|
477 |
||
478 |
if (error == 0 && vp->v_type != VLNK) |
|
479 |
error = EINVAL; |
|
480 |
||
481 |
if (error == 0) { |
|
482 |
pn_alloc(&linkpath); |
|
483 |
error = pn_getsymlink(vp, &linkpath, kcred); |
|
484 |
if (error == 0) { |
|
485 |
char *ms = strstr(linkpath.pn_path, ZVOL_PSEUDO_DEV); |
|
486 |
if (ms != NULL) { |
|
487 |
ms += strlen(ZVOL_PSEUDO_DEV); |
|
488 |
minor = stoi(&ms); |
|
489 |
} |
|
490 |
} |
|
491 |
pn_free(&linkpath); |
|
492 |
} |
|
493 |
||
494 |
if (vp != NULL) |
|
495 |
VN_RELE(vp); |
|
496 |
||
497 |
/* |
|
498 |
* If we found a minor but it's already in use, we must pick a new one. |
|
499 |
*/ |
|
500 |
if (minor != 0 && ddi_get_soft_state(zvol_state, minor) != NULL) |
|
501 |
minor = 0; |
|
502 |
||
503 |
if (minor == 0) |
|
504 |
minor = zvol_minor_alloc(); |
|
505 |
||
506 |
if (minor == 0) { |
|
507 |
dmu_objset_close(os); |
|
508 |
mutex_exit(&zvol_state_lock); |
|
509 |
return (ENXIO); |
|
510 |
} |
|
511 |
||
512 |
if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { |
|
513 |
dmu_objset_close(os); |
|
514 |
mutex_exit(&zvol_state_lock); |
|
515 |
return (EAGAIN); |
|
516 |
} |
|
517 |
||
2676 | 518 |
(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, |
519 |
(char *)name); |
|
789 | 520 |
|
521 |
(void) sprintf(chrbuf, "%uc,raw", minor); |
|
522 |
||
523 |
if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, |
|
524 |
minor, DDI_PSEUDO, 0) == DDI_FAILURE) { |
|
525 |
ddi_soft_state_free(zvol_state, minor); |
|
526 |
dmu_objset_close(os); |
|
527 |
mutex_exit(&zvol_state_lock); |
|
528 |
return (EAGAIN); |
|
529 |
} |
|
530 |
||
531 |
(void) sprintf(blkbuf, "%uc", minor); |
|
532 |
||
533 |
if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, |
|
534 |
minor, DDI_PSEUDO, 0) == DDI_FAILURE) { |
|
535 |
ddi_remove_minor_node(zfs_dip, chrbuf); |
|
536 |
ddi_soft_state_free(zvol_state, minor); |
|
537 |
dmu_objset_close(os); |
|
538 |
mutex_exit(&zvol_state_lock); |
|
539 |
return (EAGAIN); |
|
540 |
} |
|
541 |
||
542 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
543 |
||
544 |
(void) strcpy(zv->zv_name, name); |
|
545 |
zv->zv_min_bs = DEV_BSHIFT; |
|
546 |
zv->zv_minor = minor; |
|
547 |
zv->zv_volsize = volsize; |
|
548 |
zv->zv_objset = os; |
|
549 |
zv->zv_mode = ds_mode; |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
550 |
zv->zv_zilog = zil_open(os, zvol_get_data); |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
551 |
mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
552 |
avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
553 |
sizeof (rl_t), offsetof(rl_t, r_node)); |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
554 |
list_create(&zv->zv_extents, sizeof (zvol_extent_t), |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
555 |
offsetof(zvol_extent_t, ze_node)); |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
556 |
/* get and cache the blocksize */ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
557 |
error = dmu_object_info(os, ZVOL_OBJ, &doi); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
558 |
ASSERT(error == 0); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
559 |
zv->zv_volblocksize = doi.doi_data_block_size; |
1141 | 560 |
|
8227 | 561 |
zil_replay(os, zv, zvol_replay_vector); |
4787 | 562 |
zvol_size_changed(zv, maj); |
789 | 563 |
|
1544 | 564 |
/* XXX this should handle the possible i/o error */ |
789 | 565 |
VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), |
566 |
"readonly", zvol_readonly_changed_cb, zv) == 0); |
|
567 |
||
568 |
zvol_minors++; |
|
569 |
||
570 |
mutex_exit(&zvol_state_lock); |
|
571 |
||
572 |
return (0); |
|
573 |
} |
|
574 |
||
575 |
/* |
|
576 |
* Remove minor node for the specified volume. |
|
577 |
*/ |
|
578 |
int |
|
2676 | 579 |
zvol_remove_minor(const char *name) |
789 | 580 |
{ |
581 |
zvol_state_t *zv; |
|
582 |
char namebuf[30]; |
|
583 |
||
584 |
mutex_enter(&zvol_state_lock); |
|
585 |
||
2676 | 586 |
if ((zv = zvol_minor_lookup(name)) == NULL) { |
789 | 587 |
mutex_exit(&zvol_state_lock); |
588 |
return (ENXIO); |
|
589 |
} |
|
590 |
||
591 |
if (zv->zv_total_opens != 0) { |
|
592 |
mutex_exit(&zvol_state_lock); |
|
593 |
return (EBUSY); |
|
594 |
} |
|
595 |
||
596 |
(void) sprintf(namebuf, "%uc,raw", zv->zv_minor); |
|
597 |
ddi_remove_minor_node(zfs_dip, namebuf); |
|
598 |
||
599 |
(void) sprintf(namebuf, "%uc", zv->zv_minor); |
|
600 |
ddi_remove_minor_node(zfs_dip, namebuf); |
|
601 |
||
602 |
VERIFY(dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), |
|
603 |
"readonly", zvol_readonly_changed_cb, zv) == 0); |
|
604 |
||
1141 | 605 |
zil_close(zv->zv_zilog); |
606 |
zv->zv_zilog = NULL; |
|
789 | 607 |
dmu_objset_close(zv->zv_objset); |
608 |
zv->zv_objset = NULL; |
|
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
609 |
avl_destroy(&zv->zv_znode.z_range_avl); |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
610 |
mutex_destroy(&zv->zv_znode.z_range_lock); |
789 | 611 |
|
612 |
ddi_soft_state_free(zvol_state, zv->zv_minor); |
|
613 |
||
614 |
zvol_minors--; |
|
615 |
||
616 |
mutex_exit(&zvol_state_lock); |
|
617 |
||
618 |
return (0); |
|
619 |
} |
|
620 |
||
6423 | 621 |
int |
622 |
zvol_prealloc(zvol_state_t *zv) |
|
623 |
{ |
|
624 |
objset_t *os = zv->zv_objset; |
|
625 |
dmu_tx_t *tx; |
|
626 |
uint64_t refd, avail, usedobjs, availobjs; |
|
627 |
uint64_t resid = zv->zv_volsize; |
|
628 |
uint64_t off = 0; |
|
629 |
||
630 |
/* Check the space usage before attempting to allocate the space */ |
|
631 |
dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); |
|
632 |
if (avail < zv->zv_volsize) |
|
633 |
return (ENOSPC); |
|
634 |
||
635 |
/* Free old extents if they exist */ |
|
636 |
zvol_free_extents(zv); |
|
637 |
||
638 |
while (resid != 0) { |
|
639 |
int error; |
|
640 |
uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); |
|
641 |
||
642 |
tx = dmu_tx_create(os); |
|
643 |
dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); |
|
644 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
645 |
if (error) { |
|
646 |
dmu_tx_abort(tx); |
|
6992 | 647 |
(void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); |
6423 | 648 |
return (error); |
649 |
} |
|
7872
40a9434212f6
6646775 Speed up the dumpifying process for zvols
Tim Haley <Tim.Haley@Sun.COM>
parents:
7837
diff
changeset
|
650 |
dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); |
6423 | 651 |
dmu_tx_commit(tx); |
652 |
off += bytes; |
|
653 |
resid -= bytes; |
|
654 |
} |
|
655 |
txg_wait_synced(dmu_objset_pool(os), 0); |
|
656 |
||
657 |
return (0); |
|
658 |
} |
|
659 |
||
660 |
int |
|
661 |
zvol_update_volsize(zvol_state_t *zv, major_t maj, uint64_t volsize) |
|
662 |
{ |
|
663 |
dmu_tx_t *tx; |
|
664 |
int error; |
|
665 |
||
666 |
ASSERT(MUTEX_HELD(&zvol_state_lock)); |
|
667 |
||
668 |
tx = dmu_tx_create(zv->zv_objset); |
|
669 |
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); |
|
670 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
671 |
if (error) { |
|
672 |
dmu_tx_abort(tx); |
|
673 |
return (error); |
|
674 |
} |
|
675 |
||
676 |
error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, |
|
677 |
&volsize, tx); |
|
678 |
dmu_tx_commit(tx); |
|
679 |
||
680 |
if (error == 0) |
|
6992 | 681 |
error = dmu_free_long_range(zv->zv_objset, |
682 |
ZVOL_OBJ, volsize, DMU_OBJECT_END); |
|
6423 | 683 |
|
7265 | 684 |
/* |
685 |
* If we are using a faked-up state (zv_minor == 0) then don't |
|
686 |
* try to update the in-core zvol state. |
|
687 |
*/ |
|
688 |
if (error == 0 && zv->zv_minor) { |
|
6423 | 689 |
zv->zv_volsize = volsize; |
690 |
zvol_size_changed(zv, maj); |
|
691 |
} |
|
692 |
return (error); |
|
693 |
} |
|
694 |
||
789 | 695 |
int |
4787 | 696 |
zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) |
789 | 697 |
{ |
698 |
zvol_state_t *zv; |
|
699 |
int error; |
|
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
700 |
dmu_object_info_t doi; |
6423 | 701 |
uint64_t old_volsize = 0ULL; |
7265 | 702 |
zvol_state_t state = { 0 }; |
789 | 703 |
|
704 |
mutex_enter(&zvol_state_lock); |
|
705 |
||
2676 | 706 |
if ((zv = zvol_minor_lookup(name)) == NULL) { |
7265 | 707 |
/* |
708 |
* If we are doing a "zfs clone -o volsize=", then the |
|
709 |
* minor node won't exist yet. |
|
710 |
*/ |
|
711 |
error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER, |
|
712 |
&state.zv_objset); |
|
713 |
if (error != 0) |
|
714 |
goto out; |
|
715 |
zv = &state; |
|
789 | 716 |
} |
6423 | 717 |
old_volsize = zv->zv_volsize; |
789 | 718 |
|
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
719 |
if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || |
2676 | 720 |
(error = zvol_check_volsize(volsize, |
7265 | 721 |
doi.doi_data_block_size)) != 0) |
722 |
goto out; |
|
1133
335d069294d1
6357470 vdev_raidz.c has unused RAIDZ_SINGLE define, code
eschrock
parents:
849
diff
changeset
|
723 |
|
6423 | 724 |
if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { |
7265 | 725 |
error = EROFS; |
726 |
goto out; |
|
789 | 727 |
} |
728 |
||
6423 | 729 |
error = zvol_update_volsize(zv, maj, volsize); |
789 | 730 |
|
6423 | 731 |
/* |
732 |
* Reinitialize the dump area to the new size. If we |
|
733 |
* failed to resize the dump area then restore the it back to |
|
734 |
* it's original size. |
|
735 |
*/ |
|
736 |
if (error == 0 && zv->zv_flags & ZVOL_DUMPIFIED) { |
|
737 |
if ((error = zvol_dumpify(zv)) != 0 || |
|
738 |
(error = dumpvp_resize()) != 0) { |
|
739 |
(void) zvol_update_volsize(zv, maj, old_volsize); |
|
740 |
error = zvol_dumpify(zv); |
|
741 |
} |
|
789 | 742 |
} |
743 |
||
7265 | 744 |
out: |
745 |
if (state.zv_objset) |
|
746 |
dmu_objset_close(state.zv_objset); |
|
747 |
||
789 | 748 |
mutex_exit(&zvol_state_lock); |
749 |
||
750 |
return (error); |
|
751 |
} |
|
752 |
||
753 |
int |
|
2676 | 754 |
zvol_set_volblocksize(const char *name, uint64_t volblocksize) |
789 | 755 |
{ |
756 |
zvol_state_t *zv; |
|
757 |
dmu_tx_t *tx; |
|
758 |
int error; |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
759 |
boolean_t needlock; |
789 | 760 |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
761 |
/* |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
762 |
* The lock may already be held if we are being called from |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
763 |
* zvol_dump_init(). |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
764 |
*/ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
765 |
needlock = !MUTEX_HELD(&zvol_state_lock); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
766 |
if (needlock) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
767 |
mutex_enter(&zvol_state_lock); |
789 | 768 |
|
2676 | 769 |
if ((zv = zvol_minor_lookup(name)) == NULL) { |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
770 |
if (needlock) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
771 |
mutex_exit(&zvol_state_lock); |
789 | 772 |
return (ENXIO); |
773 |
} |
|
6423 | 774 |
if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
775 |
if (needlock) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
776 |
mutex_exit(&zvol_state_lock); |
789 | 777 |
return (EROFS); |
778 |
} |
|
779 |
||
780 |
tx = dmu_tx_create(zv->zv_objset); |
|
781 |
dmu_tx_hold_bonus(tx, ZVOL_OBJ); |
|
782 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
783 |
if (error) { |
|
784 |
dmu_tx_abort(tx); |
|
785 |
} else { |
|
786 |
error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ, |
|
2676 | 787 |
volblocksize, 0, tx); |
789 | 788 |
if (error == ENOTSUP) |
789 |
error = EBUSY; |
|
790 |
dmu_tx_commit(tx); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
791 |
if (error == 0) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
792 |
zv->zv_volblocksize = volblocksize; |
789 | 793 |
} |
794 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
795 |
if (needlock) |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
796 |
mutex_exit(&zvol_state_lock); |
789 | 797 |
|
798 |
return (error); |
|
799 |
} |
|
800 |
||
801 |
/*ARGSUSED*/ |
|
802 |
int |
|
803 |
zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) |
|
804 |
{ |
|
805 |
minor_t minor = getminor(*devp); |
|
806 |
zvol_state_t *zv; |
|
807 |
||
808 |
if (minor == 0) /* This is the control device */ |
|
809 |
return (0); |
|
810 |
||
811 |
mutex_enter(&zvol_state_lock); |
|
812 |
||
813 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
814 |
if (zv == NULL) { |
|
815 |
mutex_exit(&zvol_state_lock); |
|
816 |
return (ENXIO); |
|
817 |
} |
|
818 |
||
819 |
ASSERT(zv->zv_objset != NULL); |
|
820 |
||
821 |
if ((flag & FWRITE) && |
|
6423 | 822 |
(zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))) { |
789 | 823 |
mutex_exit(&zvol_state_lock); |
824 |
return (EROFS); |
|
825 |
} |
|
7405
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
826 |
if (zv->zv_flags & ZVOL_EXCL) { |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
827 |
mutex_exit(&zvol_state_lock); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
828 |
return (EBUSY); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
829 |
} |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
830 |
if (flag & FEXCL) { |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
831 |
if (zv->zv_total_opens != 0) { |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
832 |
mutex_exit(&zvol_state_lock); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
833 |
return (EBUSY); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
834 |
} |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
835 |
zv->zv_flags |= ZVOL_EXCL; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
836 |
} |
789 | 837 |
|
838 |
if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { |
|
839 |
zv->zv_open_count[otyp]++; |
|
840 |
zv->zv_total_opens++; |
|
841 |
} |
|
842 |
||
843 |
mutex_exit(&zvol_state_lock); |
|
844 |
||
845 |
return (0); |
|
846 |
} |
|
847 |
||
848 |
/*ARGSUSED*/ |
|
849 |
int |
|
850 |
zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) |
|
851 |
{ |
|
852 |
minor_t minor = getminor(dev); |
|
853 |
zvol_state_t *zv; |
|
854 |
||
855 |
if (minor == 0) /* This is the control device */ |
|
856 |
return (0); |
|
857 |
||
858 |
mutex_enter(&zvol_state_lock); |
|
859 |
||
860 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
861 |
if (zv == NULL) { |
|
862 |
mutex_exit(&zvol_state_lock); |
|
863 |
return (ENXIO); |
|
864 |
} |
|
865 |
||
7405
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
866 |
if (zv->zv_flags & ZVOL_EXCL) { |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
867 |
ASSERT(zv->zv_total_opens == 1); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
868 |
zv->zv_flags &= ~ZVOL_EXCL; |
789 | 869 |
} |
870 |
||
871 |
/* |
|
872 |
* If the open count is zero, this is a spurious close. |
|
873 |
* That indicates a bug in the kernel / DDI framework. |
|
874 |
*/ |
|
875 |
ASSERT(zv->zv_open_count[otyp] != 0); |
|
876 |
ASSERT(zv->zv_total_opens != 0); |
|
877 |
||
878 |
/* |
|
879 |
* You may get multiple opens, but only one close. |
|
880 |
*/ |
|
881 |
zv->zv_open_count[otyp]--; |
|
882 |
zv->zv_total_opens--; |
|
883 |
||
884 |
mutex_exit(&zvol_state_lock); |
|
885 |
||
886 |
return (0); |
|
887 |
} |
|
888 |
||
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
889 |
static void |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
890 |
zvol_get_done(dmu_buf_t *db, void *vzgd) |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
891 |
{ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
892 |
zgd_t *zgd = (zgd_t *)vzgd; |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
893 |
rl_t *rl = zgd->zgd_rl; |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
894 |
|
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
895 |
dmu_buf_rele(db, vzgd); |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
896 |
zfs_range_unlock(rl); |
5688
c0b02c8fd2c0
6640580 spa_get_random() is insanely slow in userland
bonwick
parents:
5331
diff
changeset
|
897 |
zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
898 |
kmem_free(zgd, sizeof (zgd_t)); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
899 |
} |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
900 |
|
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
901 |
/* |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
902 |
* Get data to generate a TX_WRITE intent log record. |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
903 |
*/ |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
904 |
static int |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
905 |
zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
906 |
{ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
907 |
zvol_state_t *zv = arg; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
908 |
objset_t *os = zv->zv_objset; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
909 |
dmu_buf_t *db; |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
910 |
rl_t *rl; |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
911 |
zgd_t *zgd; |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
912 |
uint64_t boff; /* block starting offset */ |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
913 |
int dlen = lr->lr_length; /* length of user data */ |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
914 |
int error; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
915 |
|
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
916 |
ASSERT(zio); |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
917 |
ASSERT(dlen != 0); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
918 |
|
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
919 |
/* |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
920 |
* Write records come in two flavors: immediate and indirect. |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
921 |
* For small writes it's cheaper to store the data with the |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
922 |
* log record (immediate); for large writes it's cheaper to |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
923 |
* sync the data and get a pointer to it (indirect) so that |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
924 |
* we don't have to write the data twice. |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
925 |
*/ |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
926 |
if (buf != NULL) /* immediate write */ |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
927 |
return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf)); |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
928 |
|
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
929 |
zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
930 |
zgd->zgd_zilog = zv->zv_zilog; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
931 |
zgd->zgd_bp = &lr->lr_blkptr; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
932 |
|
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
933 |
/* |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
934 |
* Lock the range of the block to ensure that when the data is |
6423 | 935 |
* written out and its checksum is being calculated that no other |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
936 |
* thread can change the block. |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
937 |
*/ |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
938 |
boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
939 |
rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize, |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
940 |
RL_READER); |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
941 |
zgd->zgd_rl = rl; |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
942 |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
943 |
VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db)); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
944 |
error = dmu_sync(zio, db, &lr->lr_blkptr, |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
945 |
lr->lr_common.lrc_txg, zvol_get_done, zgd); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
946 |
if (error == 0) |
5688
c0b02c8fd2c0
6640580 spa_get_random() is insanely slow in userland
bonwick
parents:
5331
diff
changeset
|
947 |
zil_add_block(zv->zv_zilog, &lr->lr_blkptr); |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
948 |
/* |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
949 |
* If we get EINPROGRESS, then we need to wait for a |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
950 |
* write IO initiated by dmu_sync() to complete before |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
951 |
* we can release this dbuf. We will finish everything |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
952 |
* up in the zvol_get_done() callback. |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
953 |
*/ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
954 |
if (error == EINPROGRESS) |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
955 |
return (0); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
956 |
dmu_buf_rele(db, zgd); |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
957 |
zfs_range_unlock(rl); |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
958 |
kmem_free(zgd, sizeof (zgd_t)); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
959 |
return (error); |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
960 |
} |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
961 |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
962 |
/* |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
963 |
* zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. |
1141 | 964 |
* |
965 |
* We store data in the log buffers if it's small enough. |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
966 |
* Otherwise we will later flush the data out via dmu_sync(). |
1141 | 967 |
*/ |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
968 |
ssize_t zvol_immediate_write_sz = 32768; |
1141 | 969 |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
970 |
static void |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
971 |
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len) |
1141 | 972 |
{ |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
973 |
uint32_t blocksize = zv->zv_volblocksize; |
8227 | 974 |
zilog_t *zilog = zv->zv_zilog; |
1141 | 975 |
lr_write_t *lr; |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
976 |
|
8227 | 977 |
if (zilog->zl_replay) { |
978 |
dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); |
|
979 |
zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = |
|
980 |
zilog->zl_replaying_seq; |
|
981 |
return; |
|
982 |
} |
|
983 |
||
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
984 |
while (len) { |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
985 |
ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
986 |
itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr)); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
987 |
|
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
988 |
itx->itx_wr_state = |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
989 |
len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
990 |
itx->itx_private = zv; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
991 |
lr = (lr_write_t *)&itx->itx_lr; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
992 |
lr->lr_foid = ZVOL_OBJ; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
993 |
lr->lr_offset = off; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
994 |
lr->lr_length = nbytes; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
995 |
lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
996 |
BP_ZERO(&lr->lr_blkptr); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
997 |
|
8227 | 998 |
(void) zil_itx_assign(zilog, itx, tx); |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
999 |
len -= nbytes; |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
1000 |
off += nbytes; |
1141 | 1001 |
} |
1002 |
} |
|
1003 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1004 |
static int |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1005 |
zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1006 |
boolean_t doread, boolean_t isdump) |
6423 | 1007 |
{ |
1008 |
vdev_disk_t *dvd; |
|
1009 |
int c; |
|
1010 |
int numerrors = 0; |
|
1011 |
||
1012 |
for (c = 0; c < vd->vdev_children; c++) { |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1013 |
ASSERT(vd->vdev_ops == &vdev_mirror_ops); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1014 |
int err = zvol_dumpio_vdev(vd->vdev_child[c], |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1015 |
addr, offset, size, doread, isdump); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1016 |
if (err != 0) { |
6423 | 1017 |
numerrors++; |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1018 |
} else if (doread) { |
6423 | 1019 |
break; |
1020 |
} |
|
1021 |
} |
|
1022 |
||
1023 |
if (!vd->vdev_ops->vdev_op_leaf) |
|
1024 |
return (numerrors < vd->vdev_children ? 0 : EIO); |
|
1025 |
||
7903
4c8fa38f91ec
6760985 assertion failure with dump device on pool with non-writeable vdev
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7872
diff
changeset
|
1026 |
if (doread && !vdev_readable(vd)) |
4c8fa38f91ec
6760985 assertion failure with dump device on pool with non-writeable vdev
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7872
diff
changeset
|
1027 |
return (EIO); |
4c8fa38f91ec
6760985 assertion failure with dump device on pool with non-writeable vdev
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7872
diff
changeset
|
1028 |
else if (!doread && !vdev_writeable(vd)) |
6423 | 1029 |
return (EIO); |
1030 |
||
1031 |
dvd = vd->vdev_tsd; |
|
1032 |
ASSERT3P(dvd, !=, NULL); |
|
1033 |
offset += VDEV_LABEL_START_SIZE; |
|
1034 |
||
1035 |
if (ddi_in_panic() || isdump) { |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1036 |
ASSERT(!doread); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1037 |
if (doread) |
6423 | 1038 |
return (EIO); |
1039 |
return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), |
|
1040 |
lbtodb(size))); |
|
1041 |
} else { |
|
1042 |
return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1043 |
doread ? B_READ : B_WRITE)); |
6423 | 1044 |
} |
1045 |
} |
|
1046 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1047 |
static int |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1048 |
zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1049 |
boolean_t doread, boolean_t isdump) |
6423 | 1050 |
{ |
1051 |
vdev_t *vd; |
|
1052 |
int error; |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1053 |
zvol_extent_t *ze; |
6423 | 1054 |
spa_t *spa = dmu_objset_spa(zv->zv_objset); |
1055 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1056 |
/* Must be sector aligned, and not stradle a block boundary. */ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1057 |
if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1058 |
P2BOUNDARY(offset, size, zv->zv_volblocksize)) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1059 |
return (EINVAL); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1060 |
} |
6423 | 1061 |
ASSERT(size <= zv->zv_volblocksize); |
1062 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1063 |
/* Locate the extent this belongs to */ |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1064 |
ze = list_head(&zv->zv_extents); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1065 |
while (offset >= ze->ze_nblks * zv->zv_volblocksize) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1066 |
offset -= ze->ze_nblks * zv->zv_volblocksize; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1067 |
ze = list_next(&zv->zv_extents, ze); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1068 |
} |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7638
diff
changeset
|
1069 |
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1070 |
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1071 |
offset += DVA_GET_OFFSET(&ze->ze_dva); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1072 |
error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7638
diff
changeset
|
1073 |
spa_config_exit(spa, SCL_STATE, FTAG); |
6423 | 1074 |
return (error); |
1075 |
} |
|
1076 |
||
1077 |
int |
|
789 | 1078 |
zvol_strategy(buf_t *bp) |
1079 |
{ |
|
1080 |
zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); |
|
1081 |
uint64_t off, volsize; |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1082 |
size_t resid; |
789 | 1083 |
char *addr; |
1141 | 1084 |
objset_t *os; |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1085 |
rl_t *rl; |
789 | 1086 |
int error = 0; |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1087 |
boolean_t doread = bp->b_flags & B_READ; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1088 |
boolean_t is_dump = zv->zv_flags & ZVOL_DUMPIFIED; |
789 | 1089 |
|
1090 |
if (zv == NULL) { |
|
1091 |
bioerror(bp, ENXIO); |
|
1092 |
biodone(bp); |
|
1093 |
return (0); |
|
1094 |
} |
|
1095 |
||
1096 |
if (getminor(bp->b_edev) == 0) { |
|
1097 |
bioerror(bp, EINVAL); |
|
1098 |
biodone(bp); |
|
1099 |
return (0); |
|
1100 |
} |
|
1101 |
||
6423 | 1102 |
if (!(bp->b_flags & B_READ) && |
1103 |
(zv->zv_flags & ZVOL_RDONLY || |
|
1104 |
zv->zv_mode & DS_MODE_READONLY)) { |
|
789 | 1105 |
bioerror(bp, EROFS); |
1106 |
biodone(bp); |
|
1107 |
return (0); |
|
1108 |
} |
|
1109 |
||
1110 |
off = ldbtob(bp->b_blkno); |
|
1111 |
volsize = zv->zv_volsize; |
|
1112 |
||
1141 | 1113 |
os = zv->zv_objset; |
1114 |
ASSERT(os != NULL); |
|
789 | 1115 |
|
1116 |
bp_mapin(bp); |
|
1117 |
addr = bp->b_un.b_addr; |
|
1118 |
resid = bp->b_bcount; |
|
1119 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1120 |
if (resid > 0 && (off < 0 || off >= volsize)) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1121 |
bioerror(bp, EIO); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1122 |
biodone(bp); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1123 |
return (0); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1124 |
} |
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1125 |
|
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
1126 |
/* |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
1127 |
* There must be no buffer changes when doing a dmu_sync() because |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
1128 |
* we can't change the data whilst calculating the checksum. |
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
1129 |
*/ |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1130 |
rl = zfs_range_lock(&zv->zv_znode, off, resid, |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1131 |
doread ? RL_READER : RL_WRITER); |
6423 | 1132 |
|
789 | 1133 |
while (resid != 0 && off < volsize) { |
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1134 |
size_t size = MIN(resid, zvol_maxphys); |
6423 | 1135 |
if (is_dump) { |
1136 |
size = MIN(size, P2END(off, zv->zv_volblocksize) - off); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1137 |
error = zvol_dumpio(zv, addr, off, size, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1138 |
doread, B_FALSE); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1139 |
} else if (doread) { |
1861
7929434f26fb
6413125 zvol_strategy() race can lead to on-disk corruption.
perrin
parents:
1669
diff
changeset
|
1140 |
error = dmu_read(os, ZVOL_OBJ, off, size, addr); |
789 | 1141 |
} else { |
1141 | 1142 |
dmu_tx_t *tx = dmu_tx_create(os); |
789 | 1143 |
dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); |
1144 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
1145 |
if (error) { |
|
1146 |
dmu_tx_abort(tx); |
|
1147 |
} else { |
|
1141 | 1148 |
dmu_write(os, ZVOL_OBJ, off, size, addr, tx); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1149 |
zvol_log_write(zv, tx, off, size); |
789 | 1150 |
dmu_tx_commit(tx); |
1151 |
} |
|
1152 |
} |
|
7294 | 1153 |
if (error) { |
1154 |
/* convert checksum errors into IO errors */ |
|
1155 |
if (error == ECKSUM) |
|
1156 |
error = EIO; |
|
789 | 1157 |
break; |
7294 | 1158 |
} |
789 | 1159 |
off += size; |
1160 |
addr += size; |
|
1161 |
resid -= size; |
|
1162 |
} |
|
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1163 |
zfs_range_unlock(rl); |
789 | 1164 |
|
1165 |
if ((bp->b_resid = resid) == bp->b_bcount) |
|
1166 |
bioerror(bp, off > volsize ? EINVAL : error); |
|
1167 |
||
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1168 |
if (!(bp->b_flags & B_ASYNC) && !doread && !zil_disable && !is_dump) |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1169 |
zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1170 |
biodone(bp); |
1141 | 1171 |
|
789 | 1172 |
return (0); |
1173 |
} |
|
1174 |
||
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1175 |
/* |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1176 |
* Set the buffer count to the zvol maximum transfer. |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1177 |
* Using our own routine instead of the default minphys() |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1178 |
* means that for larger writes we write bigger buffers on X86 |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1179 |
* (128K instead of 56K) and flush the disk write cache less often |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1180 |
* (every zvol_maxphys - currently 1MB) instead of minphys (currently |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1181 |
* 56K on X86 and 128K on sparc). |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1182 |
*/ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1183 |
void |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1184 |
zvol_minphys(struct buf *bp) |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1185 |
{ |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1186 |
if (bp->b_bcount > zvol_maxphys) |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1187 |
bp->b_bcount = zvol_maxphys; |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1188 |
} |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
3016
diff
changeset
|
1189 |
|
6423 | 1190 |
int |
1191 |
zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) |
|
1192 |
{ |
|
1193 |
minor_t minor = getminor(dev); |
|
1194 |
zvol_state_t *zv; |
|
1195 |
int error = 0; |
|
1196 |
uint64_t size; |
|
1197 |
uint64_t boff; |
|
1198 |
uint64_t resid; |
|
1199 |
||
1200 |
if (minor == 0) /* This is the control device */ |
|
1201 |
return (ENXIO); |
|
1202 |
||
1203 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
1204 |
if (zv == NULL) |
|
1205 |
return (ENXIO); |
|
1206 |
||
1207 |
boff = ldbtob(blkno); |
|
1208 |
resid = ldbtob(nblocks); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1209 |
|
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1210 |
VERIFY3U(boff + resid, <=, zv->zv_volsize); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1211 |
|
6423 | 1212 |
while (resid) { |
1213 |
size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1214 |
error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); |
6423 | 1215 |
if (error) |
1216 |
break; |
|
1217 |
boff += size; |
|
1218 |
addr += size; |
|
1219 |
resid -= size; |
|
1220 |
} |
|
1221 |
||
1222 |
return (error); |
|
1223 |
} |
|
1224 |
||
789 | 1225 |
/*ARGSUSED*/ |
1226 |
int |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1227 |
zvol_read(dev_t dev, uio_t *uio, cred_t *cr) |
789 | 1228 |
{ |
4107 | 1229 |
minor_t minor = getminor(dev); |
1230 |
zvol_state_t *zv; |
|
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1231 |
uint64_t volsize; |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1232 |
rl_t *rl; |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1233 |
int error = 0; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1234 |
|
4107 | 1235 |
if (minor == 0) /* This is the control device */ |
1236 |
return (ENXIO); |
|
1237 |
||
1238 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
1239 |
if (zv == NULL) |
|
1240 |
return (ENXIO); |
|
1241 |
||
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1242 |
volsize = zv->zv_volsize; |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1243 |
if (uio->uio_resid > 0 && |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1244 |
(uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1245 |
return (EIO); |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1246 |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1247 |
if (zv->zv_flags & ZVOL_DUMPIFIED) { |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1248 |
error = physio(zvol_strategy, NULL, dev, B_READ, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1249 |
zvol_minphys, uio); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1250 |
return (error); |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1251 |
} |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1252 |
|
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1253 |
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1254 |
RL_READER); |
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1255 |
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1256 |
uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1257 |
|
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1258 |
/* don't read past the end */ |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1259 |
if (bytes > volsize - uio->uio_loffset) |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1260 |
bytes = volsize - uio->uio_loffset; |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1261 |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1262 |
error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); |
7294 | 1263 |
if (error) { |
1264 |
/* convert checksum errors into IO errors */ |
|
1265 |
if (error == ECKSUM) |
|
1266 |
error = EIO; |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1267 |
break; |
7294 | 1268 |
} |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1269 |
} |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1270 |
zfs_range_unlock(rl); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1271 |
return (error); |
789 | 1272 |
} |
1273 |
||
1274 |
/*ARGSUSED*/ |
|
1275 |
int |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1276 |
zvol_write(dev_t dev, uio_t *uio, cred_t *cr) |
789 | 1277 |
{ |
4107 | 1278 |
minor_t minor = getminor(dev); |
1279 |
zvol_state_t *zv; |
|
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1280 |
uint64_t volsize; |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1281 |
rl_t *rl; |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1282 |
int error = 0; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1283 |
|
4107 | 1284 |
if (minor == 0) /* This is the control device */ |
1285 |
return (ENXIO); |
|
1286 |
||
1287 |
zv = ddi_get_soft_state(zvol_state, minor); |
|
1288 |
if (zv == NULL) |
|
1289 |
return (ENXIO); |
|
1290 |
||
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1291 |
volsize = zv->zv_volsize; |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1292 |
if (uio->uio_resid > 0 && |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1293 |
(uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1294 |
return (EIO); |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1295 |
|
6423 | 1296 |
if (zv->zv_flags & ZVOL_DUMPIFIED) { |
1297 |
error = physio(zvol_strategy, NULL, dev, B_WRITE, |
|
1298 |
zvol_minphys, uio); |
|
1299 |
return (error); |
|
1300 |
} |
|
1301 |
||
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1302 |
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, |
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1303 |
RL_WRITER); |
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1304 |
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1305 |
uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1306 |
uint64_t off = uio->uio_loffset; |
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1307 |
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); |
789 | 1308 |
|
7013
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1309 |
if (bytes > volsize - off) /* don't write past the end */ |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1310 |
bytes = volsize - off; |
540c400de3b4
6596419 zvol character (raw) devices allow read/write past the end of the device
gw25295
parents:
6992
diff
changeset
|
1311 |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1312 |
dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1313 |
error = dmu_tx_assign(tx, TXG_WAIT); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1314 |
if (error) { |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1315 |
dmu_tx_abort(tx); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1316 |
break; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1317 |
} |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1318 |
error = dmu_write_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes, tx); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1319 |
if (error == 0) |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1320 |
zvol_log_write(zv, tx, off, bytes); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1321 |
dmu_tx_commit(tx); |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1322 |
|
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1323 |
if (error) |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1324 |
break; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1325 |
} |
3755
8708c35cb823
6525008 panic: dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC, file: ../../common/fs/zfs/dbuf.c, line: 676
perrin
parents:
3638
diff
changeset
|
1326 |
zfs_range_unlock(rl); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1327 |
return (error); |
789 | 1328 |
} |
1329 |
||
7405
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1330 |
int |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1331 |
zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1332 |
{ |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1333 |
struct uuid uuid = EFI_RESERVED; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1334 |
efi_gpe_t gpe = { 0 }; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1335 |
uint32_t crc; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1336 |
dk_efi_t efi; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1337 |
int length; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1338 |
char *ptr; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1339 |
|
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1340 |
if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1341 |
return (EFAULT); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1342 |
ptr = (char *)(uintptr_t)efi.dki_data_64; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1343 |
length = efi.dki_length; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1344 |
/* |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1345 |
* Some clients may attempt to request a PMBR for the |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1346 |
* zvol. Currently this interface will return EINVAL to |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1347 |
* such requests. These requests could be supported by |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1348 |
* adding a check for lba == 0 and consing up an appropriate |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1349 |
* PMBR. |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1350 |
*/ |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1351 |
if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1352 |
return (EINVAL); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1353 |
|
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1354 |
gpe.efi_gpe_StartingLBA = LE_64(34ULL); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1355 |
gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1356 |
UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1357 |
|
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1358 |
if (efi.dki_lba == 1) { |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1359 |
efi_gpt_t gpt = { 0 }; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1360 |
|
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1361 |
gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1362 |
gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1363 |
gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1364 |
gpt.efi_gpt_MyLBA = LE_64(1ULL); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1365 |
gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1366 |
gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1367 |
gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1368 |
gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1369 |
gpt.efi_gpt_SizeOfPartitionEntry = |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1370 |
LE_32(sizeof (efi_gpe_t)); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1371 |
CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1372 |
gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1373 |
CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1374 |
gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1375 |
if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1376 |
flag)) |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1377 |
return (EFAULT); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1378 |
ptr += sizeof (gpt); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1379 |
length -= sizeof (gpt); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1380 |
} |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1381 |
if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1382 |
length), flag)) |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1383 |
return (EFAULT); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1384 |
return (0); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1385 |
} |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1386 |
|
789 | 1387 |
/* |
1388 |
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). |
|
1389 |
*/ |
|
1390 |
/*ARGSUSED*/ |
|
1391 |
int |
|
1392 |
zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) |
|
1393 |
{ |
|
1394 |
zvol_state_t *zv; |
|
3897
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1395 |
struct dk_cinfo dki; |
789 | 1396 |
struct dk_minfo dkm; |
3897
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1397 |
struct dk_callback *dkc; |
789 | 1398 |
int error = 0; |
6423 | 1399 |
rl_t *rl; |
789 | 1400 |
|
1401 |
mutex_enter(&zvol_state_lock); |
|
1402 |
||
1403 |
zv = ddi_get_soft_state(zvol_state, getminor(dev)); |
|
1404 |
||
1405 |
if (zv == NULL) { |
|
1406 |
mutex_exit(&zvol_state_lock); |
|
1407 |
return (ENXIO); |
|
1408 |
} |
|
1409 |
||
1410 |
switch (cmd) { |
|
1411 |
||
1412 |
case DKIOCINFO: |
|
3897
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1413 |
bzero(&dki, sizeof (dki)); |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1414 |
(void) strcpy(dki.dki_cname, "zvol"); |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1415 |
(void) strcpy(dki.dki_dname, "zvol"); |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1416 |
dki.dki_ctype = DKC_UNKNOWN; |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1417 |
dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); |
789 | 1418 |
mutex_exit(&zvol_state_lock); |
3897
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1419 |
if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) |
789 | 1420 |
error = EFAULT; |
1421 |
return (error); |
|
1422 |
||
1423 |
case DKIOCGMEDIAINFO: |
|
1424 |
bzero(&dkm, sizeof (dkm)); |
|
1425 |
dkm.dki_lbsize = 1U << zv->zv_min_bs; |
|
1426 |
dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; |
|
1427 |
dkm.dki_media_type = DK_UNKNOWN; |
|
1428 |
mutex_exit(&zvol_state_lock); |
|
1429 |
if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) |
|
1430 |
error = EFAULT; |
|
1431 |
return (error); |
|
1432 |
||
1433 |
case DKIOCGETEFI: |
|
7405
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1434 |
{ |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1435 |
uint64_t vs = zv->zv_volsize; |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1436 |
uint8_t bs = zv->zv_min_bs; |
3016 | 1437 |
|
1438 |
mutex_exit(&zvol_state_lock); |
|
7405
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1439 |
error = zvol_getefi((void *)arg, flag, vs, bs); |
22b4aeef8023
6612759 zvol DKIOCGETEFI should return GPT and GPE when possible
Eric Taylor <Eric.Taylor@Sun.COM>
parents:
7294
diff
changeset
|
1440 |
return (error); |
3016 | 1441 |
} |
789 | 1442 |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1443 |
case DKIOCFLUSHWRITECACHE: |
3897
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1444 |
dkc = (struct dk_callback *)arg; |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1445 |
zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); |
3897
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1446 |
if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1447 |
(*dkc->dkc_callback)(dkc->dkc_cookie, error); |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1448 |
error = 0; |
278bade789ba
6437750 panic: db->db_buf==0||arc_referenced(db->db_buf), file: dbuf.c,line:1539
maybee
parents:
3755
diff
changeset
|
1449 |
} |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1450 |
break; |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
1451 |
|
3245
0c86ad4b2673
6493634 zvol should return ENOTSUP on DKIOCGVTOC ioctl
maybee
parents:
3080
diff
changeset
|
1452 |
case DKIOCGGEOM: |
0c86ad4b2673
6493634 zvol should return ENOTSUP on DKIOCGVTOC ioctl
maybee
parents:
3080
diff
changeset
|
1453 |
case DKIOCGVTOC: |
6423 | 1454 |
/* |
1455 |
* commands using these (like prtvtoc) expect ENOTSUP |
|
1456 |
* since we're emulating an EFI label |
|
1457 |
*/ |
|
3245
0c86ad4b2673
6493634 zvol should return ENOTSUP on DKIOCGVTOC ioctl
maybee
parents:
3080
diff
changeset
|
1458 |
error = ENOTSUP; |
0c86ad4b2673
6493634 zvol should return ENOTSUP on DKIOCGVTOC ioctl
maybee
parents:
3080
diff
changeset
|
1459 |
break; |
0c86ad4b2673
6493634 zvol should return ENOTSUP on DKIOCGVTOC ioctl
maybee
parents:
3080
diff
changeset
|
1460 |
|
6423 | 1461 |
case DKIOCDUMPINIT: |
1462 |
rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, |
|
1463 |
RL_WRITER); |
|
1464 |
error = zvol_dumpify(zv); |
|
1465 |
zfs_range_unlock(rl); |
|
1466 |
break; |
|
1467 |
||
1468 |
case DKIOCDUMPFINI: |
|
1469 |
rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, |
|
1470 |
RL_WRITER); |
|
1471 |
error = zvol_dump_fini(zv); |
|
1472 |
zfs_range_unlock(rl); |
|
1473 |
break; |
|
1474 |
||
789 | 1475 |
default: |
3016 | 1476 |
error = ENOTTY; |
789 | 1477 |
break; |
1478 |
||
1479 |
} |
|
1480 |
mutex_exit(&zvol_state_lock); |
|
1481 |
return (error); |
|
1482 |
} |
|
1483 |
||
1484 |
int |
|
1485 |
zvol_busy(void) |
|
1486 |
{ |
|
1487 |
return (zvol_minors != 0); |
|
1488 |
} |
|
1489 |
||
1490 |
void |
|
1491 |
zvol_init(void) |
|
1492 |
{ |
|
1493 |
VERIFY(ddi_soft_state_init(&zvol_state, sizeof (zvol_state_t), 1) == 0); |
|
1494 |
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); |
|
1495 |
} |
|
1496 |
||
1497 |
void |
|
1498 |
zvol_fini(void) |
|
1499 |
{ |
|
1500 |
mutex_destroy(&zvol_state_lock); |
|
1501 |
ddi_soft_state_fini(&zvol_state); |
|
1502 |
} |
|
6423 | 1503 |
|
1504 |
static boolean_t |
|
1505 |
zvol_is_swap(zvol_state_t *zv) |
|
1506 |
{ |
|
1507 |
vnode_t *vp; |
|
1508 |
boolean_t ret = B_FALSE; |
|
1509 |
char *devpath; |
|
1510 |
size_t devpathlen; |
|
1511 |
int error; |
|
1512 |
||
1513 |
devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(zv->zv_name) + 1; |
|
1514 |
devpath = kmem_alloc(devpathlen, KM_SLEEP); |
|
1515 |
(void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name); |
|
1516 |
error = lookupname(devpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); |
|
1517 |
kmem_free(devpath, devpathlen); |
|
1518 |
||
1519 |
ret = !error && IS_SWAPVP(common_specvp(vp)); |
|
1520 |
||
1521 |
if (vp != NULL) |
|
1522 |
VN_RELE(vp); |
|
1523 |
||
1524 |
return (ret); |
|
1525 |
} |
|
1526 |
||
1527 |
static int |
|
1528 |
zvol_dump_init(zvol_state_t *zv, boolean_t resize) |
|
1529 |
{ |
|
1530 |
dmu_tx_t *tx; |
|
1531 |
int error = 0; |
|
1532 |
objset_t *os = zv->zv_objset; |
|
1533 |
nvlist_t *nv = NULL; |
|
1534 |
||
1535 |
ASSERT(MUTEX_HELD(&zvol_state_lock)); |
|
1536 |
||
1537 |
tx = dmu_tx_create(os); |
|
1538 |
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); |
|
1539 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
1540 |
if (error) { |
|
1541 |
dmu_tx_abort(tx); |
|
1542 |
return (error); |
|
1543 |
} |
|
1544 |
||
1545 |
/* |
|
1546 |
* If we are resizing the dump device then we only need to |
|
1547 |
* update the refreservation to match the newly updated |
|
1548 |
* zvolsize. Otherwise, we save off the original state of the |
|
1549 |
* zvol so that we can restore them if the zvol is ever undumpified. |
|
1550 |
*/ |
|
1551 |
if (resize) { |
|
1552 |
error = zap_update(os, ZVOL_ZAP_OBJ, |
|
1553 |
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, |
|
1554 |
&zv->zv_volsize, tx); |
|
1555 |
} else { |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1556 |
uint64_t checksum, compress, refresrv, vbs; |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1557 |
|
6423 | 1558 |
error = dsl_prop_get_integer(zv->zv_name, |
1559 |
zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); |
|
1560 |
error = error ? error : dsl_prop_get_integer(zv->zv_name, |
|
1561 |
zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); |
|
1562 |
error = error ? error : dsl_prop_get_integer(zv->zv_name, |
|
1563 |
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1564 |
error = error ? error : dsl_prop_get_integer(zv->zv_name, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1565 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); |
6423 | 1566 |
|
1567 |
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, |
|
1568 |
zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, |
|
1569 |
&compress, tx); |
|
1570 |
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, |
|
1571 |
zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); |
|
1572 |
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, |
|
1573 |
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, |
|
1574 |
&refresrv, tx); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1575 |
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1576 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1577 |
&vbs, tx); |
6423 | 1578 |
} |
1579 |
dmu_tx_commit(tx); |
|
1580 |
||
1581 |
/* Truncate the file */ |
|
1582 |
if (!error) |
|
6992 | 1583 |
error = dmu_free_long_range(zv->zv_objset, |
1584 |
ZVOL_OBJ, 0, DMU_OBJECT_END); |
|
6423 | 1585 |
|
1586 |
if (error) |
|
1587 |
return (error); |
|
1588 |
||
1589 |
/* |
|
1590 |
* We only need update the zvol's property if we are initializing |
|
1591 |
* the dump area for the first time. |
|
1592 |
*/ |
|
1593 |
if (!resize) { |
|
1594 |
VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); |
|
1595 |
VERIFY(nvlist_add_uint64(nv, |
|
1596 |
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); |
|
1597 |
VERIFY(nvlist_add_uint64(nv, |
|
1598 |
zfs_prop_to_name(ZFS_PROP_COMPRESSION), |
|
1599 |
ZIO_COMPRESS_OFF) == 0); |
|
1600 |
VERIFY(nvlist_add_uint64(nv, |
|
1601 |
zfs_prop_to_name(ZFS_PROP_CHECKSUM), |
|
1602 |
ZIO_CHECKSUM_OFF) == 0); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1603 |
VERIFY(nvlist_add_uint64(nv, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1604 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1605 |
SPA_MAXBLOCKSIZE) == 0); |
6423 | 1606 |
|
1607 |
error = zfs_set_prop_nvlist(zv->zv_name, nv); |
|
1608 |
nvlist_free(nv); |
|
1609 |
||
1610 |
if (error) |
|
1611 |
return (error); |
|
1612 |
} |
|
1613 |
||
1614 |
/* Allocate the space for the dump */ |
|
1615 |
error = zvol_prealloc(zv); |
|
1616 |
return (error); |
|
1617 |
} |
|
1618 |
||
1619 |
static int |
|
1620 |
zvol_dumpify(zvol_state_t *zv) |
|
1621 |
{ |
|
1622 |
int error = 0; |
|
1623 |
uint64_t dumpsize = 0; |
|
1624 |
dmu_tx_t *tx; |
|
1625 |
objset_t *os = zv->zv_objset; |
|
1626 |
||
1627 |
if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) |
|
1628 |
return (EROFS); |
|
1629 |
||
1630 |
/* |
|
1631 |
* We do not support swap devices acting as dump devices. |
|
1632 |
*/ |
|
1633 |
if (zvol_is_swap(zv)) |
|
1634 |
return (ENOTSUP); |
|
1635 |
||
1636 |
if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, |
|
1637 |
8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { |
|
1638 |
boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; |
|
1639 |
||
1640 |
if ((error = zvol_dump_init(zv, resize)) != 0) { |
|
1641 |
(void) zvol_dump_fini(zv); |
|
1642 |
return (error); |
|
1643 |
} |
|
1644 |
} |
|
1645 |
||
1646 |
/* |
|
1647 |
* Build up our lba mapping. |
|
1648 |
*/ |
|
1649 |
error = zvol_get_lbas(zv); |
|
1650 |
if (error) { |
|
1651 |
(void) zvol_dump_fini(zv); |
|
1652 |
return (error); |
|
1653 |
} |
|
1654 |
||
1655 |
tx = dmu_tx_create(os); |
|
1656 |
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); |
|
1657 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
1658 |
if (error) { |
|
1659 |
dmu_tx_abort(tx); |
|
1660 |
(void) zvol_dump_fini(zv); |
|
1661 |
return (error); |
|
1662 |
} |
|
1663 |
||
1664 |
zv->zv_flags |= ZVOL_DUMPIFIED; |
|
1665 |
error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, |
|
1666 |
&zv->zv_volsize, tx); |
|
1667 |
dmu_tx_commit(tx); |
|
1668 |
||
1669 |
if (error) { |
|
1670 |
(void) zvol_dump_fini(zv); |
|
1671 |
return (error); |
|
1672 |
} |
|
1673 |
||
1674 |
txg_wait_synced(dmu_objset_pool(os), 0); |
|
1675 |
return (0); |
|
1676 |
} |
|
1677 |
||
1678 |
static int |
|
1679 |
zvol_dump_fini(zvol_state_t *zv) |
|
1680 |
{ |
|
1681 |
dmu_tx_t *tx; |
|
1682 |
objset_t *os = zv->zv_objset; |
|
1683 |
nvlist_t *nv; |
|
1684 |
int error = 0; |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1685 |
uint64_t checksum, compress, refresrv, vbs; |
6423 | 1686 |
|
7080
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1687 |
/* |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1688 |
* Attempt to restore the zvol back to its pre-dumpified state. |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1689 |
* This is a best-effort attempt as it's possible that not all |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1690 |
* of these properties were initialized during the dumpify process |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1691 |
* (i.e. error during zvol_dump_init). |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1692 |
*/ |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1693 |
|
6423 | 1694 |
tx = dmu_tx_create(os); |
1695 |
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); |
|
1696 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
1697 |
if (error) { |
|
1698 |
dmu_tx_abort(tx); |
|
1699 |
return (error); |
|
1700 |
} |
|
7080
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1701 |
(void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1702 |
dmu_tx_commit(tx); |
6423 | 1703 |
|
1704 |
(void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, |
|
1705 |
zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); |
|
1706 |
(void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, |
|
1707 |
zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); |
|
1708 |
(void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, |
|
1709 |
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1710 |
(void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1711 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); |
6423 | 1712 |
|
1713 |
VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); |
|
1714 |
(void) nvlist_add_uint64(nv, |
|
1715 |
zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); |
|
1716 |
(void) nvlist_add_uint64(nv, |
|
1717 |
zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); |
|
1718 |
(void) nvlist_add_uint64(nv, |
|
1719 |
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); |
|
7837
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1720 |
(void) nvlist_add_uint64(nv, |
001de5627df3
6333409 traversal code should be able to issue multiple reads in parallel
Matthew Ahrens <Matthew.Ahrens@Sun.COM>
parents:
7754
diff
changeset
|
1721 |
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), vbs); |
6423 | 1722 |
(void) zfs_set_prop_nvlist(zv->zv_name, nv); |
1723 |
nvlist_free(nv); |
|
1724 |
||
7080
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1725 |
zvol_free_extents(zv); |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1726 |
zv->zv_flags &= ~ZVOL_DUMPIFIED; |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1727 |
(void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); |
13a30a37ddc0
6724533 assertion failed: file: ../../common/fs/zfs/dmu_tx.c, line: 983
maybee
parents:
7013
diff
changeset
|
1728 |
|
6423 | 1729 |
return (0); |
1730 |
} |