author | eschrock |
Tue, 05 Sep 2006 11:37:36 -0700 | |
changeset 2676 | 5cee47eddab6 |
parent 2638 | 4f583dfeae92 |
child 2885 | c0259887ebbc |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
5 |
* Common Development and Distribution License (the "License"). |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1298 | 22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/types.h> |
|
29 |
#include <sys/param.h> |
|
30 |
#include <sys/systm.h> |
|
31 |
#include <sys/sysmacros.h> |
|
32 |
#include <sys/kmem.h> |
|
33 |
#include <sys/pathname.h> |
|
34 |
#include <sys/acl.h> |
|
35 |
#include <sys/vnode.h> |
|
36 |
#include <sys/vfs.h> |
|
37 |
#include <sys/mntent.h> |
|
38 |
#include <sys/mount.h> |
|
39 |
#include <sys/cmn_err.h> |
|
40 |
#include "fs/fs_subr.h" |
|
41 |
#include <sys/zfs_znode.h> |
|
42 |
#include <sys/zil.h> |
|
43 |
#include <sys/fs/zfs.h> |
|
44 |
#include <sys/dmu.h> |
|
45 |
#include <sys/dsl_prop.h> |
|
46 |
#include <sys/spa.h> |
|
47 |
#include <sys/zap.h> |
|
48 |
#include <sys/varargs.h> |
|
49 |
#include <sys/policy.h> |
|
50 |
#include <sys/atomic.h> |
|
51 |
#include <sys/mkdev.h> |
|
52 |
#include <sys/modctl.h> |
|
53 |
#include <sys/zfs_ioctl.h> |
|
54 |
#include <sys/zfs_ctldir.h> |
|
1544 | 55 |
#include <sys/bootconf.h> |
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
56 |
#include <sys/sunddi.h> |
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
57 |
#include <sys/dnlc.h> |
789 | 58 |
|
59 |
int zfsfstype; |
|
60 |
vfsops_t *zfs_vfsops = NULL; |
|
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
61 |
static major_t zfs_major; |
789 | 62 |
static minor_t zfs_minor; |
63 |
static kmutex_t zfs_dev_mtx; |
|
64 |
||
1544 | 65 |
extern char zfs_bootpath[BO_MAXOBJNAME]; |
66 |
||
789 | 67 |
static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); |
68 |
static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); |
|
1544 | 69 |
static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); |
789 | 70 |
static int zfs_root(vfs_t *vfsp, vnode_t **vpp); |
71 |
static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); |
|
72 |
static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); |
|
73 |
static void zfs_freevfs(vfs_t *vfsp); |
|
74 |
static void zfs_objset_close(zfsvfs_t *zfsvfs); |
|
75 |
||
76 |
static const fs_operation_def_t zfs_vfsops_template[] = { |
|
77 |
VFSNAME_MOUNT, zfs_mount, |
|
1544 | 78 |
VFSNAME_MOUNTROOT, zfs_mountroot, |
789 | 79 |
VFSNAME_UNMOUNT, zfs_umount, |
80 |
VFSNAME_ROOT, zfs_root, |
|
81 |
VFSNAME_STATVFS, zfs_statvfs, |
|
82 |
VFSNAME_SYNC, (fs_generic_func_p) zfs_sync, |
|
83 |
VFSNAME_VGET, zfs_vget, |
|
84 |
VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs, |
|
85 |
NULL, NULL |
|
86 |
}; |
|
87 |
||
88 |
static const fs_operation_def_t zfs_vfsops_eio_template[] = { |
|
89 |
VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs, |
|
90 |
NULL, NULL |
|
91 |
}; |
|
92 |
||
93 |
/* |
|
94 |
* We need to keep a count of active fs's. |
|
95 |
* This is necessary to prevent our module |
|
96 |
* from being unloaded after a umount -f |
|
97 |
*/ |
|
98 |
static uint32_t zfs_active_fs_count = 0; |
|
99 |
||
100 |
static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; |
|
101 |
static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; |
|
102 |
||
103 |
static mntopt_t mntopts[] = { |
|
104 |
{ MNTOPT_XATTR, NULL, NULL, MO_NODISPLAY|MO_DEFAULT, NULL }, |
|
105 |
{ MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL }, |
|
106 |
{ MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } |
|
107 |
}; |
|
108 |
||
109 |
static mntopts_t zfs_mntopts = { |
|
110 |
sizeof (mntopts) / sizeof (mntopt_t), |
|
111 |
mntopts |
|
112 |
}; |
|
113 |
||
114 |
/*ARGSUSED*/ |
|
115 |
int |
|
116 |
zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) |
|
117 |
{ |
|
118 |
/* |
|
119 |
* Data integrity is job one. We don't want a compromised kernel |
|
120 |
* writing to the storage pool, so we never sync during panic. |
|
121 |
*/ |
|
122 |
if (panicstr) |
|
123 |
return (0); |
|
124 |
||
125 |
/* |
|
126 |
* SYNC_ATTR is used by fsflush() to force old filesystems like UFS |
|
127 |
* to sync metadata, which they would otherwise cache indefinitely. |
|
128 |
* Semantically, the only requirement is that the sync be initiated. |
|
129 |
* The DMU syncs out txgs frequently, so there's nothing to do. |
|
130 |
*/ |
|
131 |
if (flag & SYNC_ATTR) |
|
132 |
return (0); |
|
133 |
||
134 |
if (vfsp != NULL) { |
|
135 |
/* |
|
136 |
* Sync a specific filesystem. |
|
137 |
*/ |
|
138 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
139 |
||
140 |
ZFS_ENTER(zfsvfs); |
|
141 |
if (zfsvfs->z_log != NULL) |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2474
diff
changeset
|
142 |
zil_commit(zfsvfs->z_log, UINT64_MAX, 0); |
789 | 143 |
else |
144 |
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); |
|
145 |
ZFS_EXIT(zfsvfs); |
|
146 |
} else { |
|
147 |
/* |
|
148 |
* Sync all ZFS filesystems. This is what happens when you |
|
149 |
* run sync(1M). Unlike other filesystems, ZFS honors the |
|
150 |
* request by waiting for all pools to commit all dirty data. |
|
151 |
*/ |
|
152 |
spa_sync_allpools(); |
|
153 |
} |
|
154 |
||
155 |
return (0); |
|
156 |
} |
|
157 |
||
1544 | 158 |
static int |
159 |
zfs_create_unique_device(dev_t *dev) |
|
160 |
{ |
|
161 |
major_t new_major; |
|
162 |
||
163 |
do { |
|
164 |
ASSERT3U(zfs_minor, <=, MAXMIN32); |
|
165 |
minor_t start = zfs_minor; |
|
166 |
do { |
|
167 |
mutex_enter(&zfs_dev_mtx); |
|
168 |
if (zfs_minor >= MAXMIN32) { |
|
169 |
/* |
|
170 |
* If we're still using the real major |
|
171 |
* keep out of /dev/zfs and /dev/zvol minor |
|
172 |
* number space. If we're using a getudev()'ed |
|
173 |
* major number, we can use all of its minors. |
|
174 |
*/ |
|
175 |
if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) |
|
176 |
zfs_minor = ZFS_MIN_MINOR; |
|
177 |
else |
|
178 |
zfs_minor = 0; |
|
179 |
} else { |
|
180 |
zfs_minor++; |
|
181 |
} |
|
182 |
*dev = makedevice(zfs_major, zfs_minor); |
|
183 |
mutex_exit(&zfs_dev_mtx); |
|
184 |
} while (vfs_devismounted(*dev) && zfs_minor != start); |
|
185 |
if (zfs_minor == start) { |
|
186 |
/* |
|
187 |
* We are using all ~262,000 minor numbers for the |
|
188 |
* current major number. Create a new major number. |
|
189 |
*/ |
|
190 |
if ((new_major = getudev()) == (major_t)-1) { |
|
191 |
cmn_err(CE_WARN, |
|
192 |
"zfs_mount: Can't get unique major " |
|
193 |
"device number."); |
|
194 |
return (-1); |
|
195 |
} |
|
196 |
mutex_enter(&zfs_dev_mtx); |
|
197 |
zfs_major = new_major; |
|
198 |
zfs_minor = 0; |
|
199 |
||
200 |
mutex_exit(&zfs_dev_mtx); |
|
201 |
} else { |
|
202 |
break; |
|
203 |
} |
|
204 |
/* CONSTANTCONDITION */ |
|
205 |
} while (1); |
|
206 |
||
207 |
return (0); |
|
208 |
} |
|
209 |
||
789 | 210 |
static void |
211 |
atime_changed_cb(void *arg, uint64_t newval) |
|
212 |
{ |
|
213 |
zfsvfs_t *zfsvfs = arg; |
|
214 |
||
215 |
if (newval == TRUE) { |
|
216 |
zfsvfs->z_atime = TRUE; |
|
217 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); |
|
218 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); |
|
219 |
} else { |
|
220 |
zfsvfs->z_atime = FALSE; |
|
221 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); |
|
222 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); |
|
223 |
} |
|
224 |
} |
|
225 |
||
226 |
static void |
|
227 |
blksz_changed_cb(void *arg, uint64_t newval) |
|
228 |
{ |
|
229 |
zfsvfs_t *zfsvfs = arg; |
|
230 |
||
231 |
if (newval < SPA_MINBLOCKSIZE || |
|
232 |
newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) |
|
233 |
newval = SPA_MAXBLOCKSIZE; |
|
234 |
||
235 |
zfsvfs->z_max_blksz = newval; |
|
236 |
zfsvfs->z_vfs->vfs_bsize = newval; |
|
237 |
} |
|
238 |
||
239 |
static void |
|
240 |
readonly_changed_cb(void *arg, uint64_t newval) |
|
241 |
{ |
|
242 |
zfsvfs_t *zfsvfs = arg; |
|
243 |
||
244 |
if (newval) { |
|
245 |
/* XXX locking on vfs_flag? */ |
|
246 |
zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; |
|
247 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); |
|
248 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); |
|
249 |
(void) zfs_delete_thread_target(zfsvfs, 0); |
|
250 |
} else { |
|
251 |
/* XXX locking on vfs_flag? */ |
|
252 |
zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; |
|
253 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); |
|
254 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); |
|
255 |
(void) zfs_delete_thread_target(zfsvfs, 1); |
|
256 |
} |
|
257 |
} |
|
258 |
||
259 |
static void |
|
260 |
devices_changed_cb(void *arg, uint64_t newval) |
|
261 |
{ |
|
262 |
zfsvfs_t *zfsvfs = arg; |
|
263 |
||
264 |
if (newval == FALSE) { |
|
265 |
zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; |
|
266 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); |
|
267 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); |
|
268 |
} else { |
|
269 |
zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; |
|
270 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); |
|
271 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); |
|
272 |
} |
|
273 |
} |
|
274 |
||
275 |
static void |
|
276 |
setuid_changed_cb(void *arg, uint64_t newval) |
|
277 |
{ |
|
278 |
zfsvfs_t *zfsvfs = arg; |
|
279 |
||
280 |
if (newval == FALSE) { |
|
281 |
zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; |
|
282 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); |
|
283 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); |
|
284 |
} else { |
|
285 |
zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; |
|
286 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); |
|
287 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); |
|
288 |
} |
|
289 |
} |
|
290 |
||
291 |
static void |
|
292 |
exec_changed_cb(void *arg, uint64_t newval) |
|
293 |
{ |
|
294 |
zfsvfs_t *zfsvfs = arg; |
|
295 |
||
296 |
if (newval == FALSE) { |
|
297 |
zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; |
|
298 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); |
|
299 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); |
|
300 |
} else { |
|
301 |
zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; |
|
302 |
vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); |
|
303 |
vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); |
|
304 |
} |
|
305 |
} |
|
306 |
||
307 |
static void |
|
308 |
snapdir_changed_cb(void *arg, uint64_t newval) |
|
309 |
{ |
|
310 |
zfsvfs_t *zfsvfs = arg; |
|
311 |
||
312 |
zfsvfs->z_show_ctldir = newval; |
|
313 |
} |
|
314 |
||
315 |
static void |
|
316 |
acl_mode_changed_cb(void *arg, uint64_t newval) |
|
317 |
{ |
|
318 |
zfsvfs_t *zfsvfs = arg; |
|
319 |
||
320 |
zfsvfs->z_acl_mode = newval; |
|
321 |
} |
|
322 |
||
323 |
static void |
|
324 |
acl_inherit_changed_cb(void *arg, uint64_t newval) |
|
325 |
{ |
|
326 |
zfsvfs_t *zfsvfs = arg; |
|
327 |
||
328 |
zfsvfs->z_acl_inherit = newval; |
|
329 |
} |
|
330 |
||
1544 | 331 |
static int |
332 |
zfs_refresh_properties(vfs_t *vfsp) |
|
333 |
{ |
|
334 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
335 |
||
2354
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
336 |
/* |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
337 |
* Remount operations default to "rw" unless "ro" is explicitly |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
338 |
* specified. |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
339 |
*/ |
1544 | 340 |
if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { |
341 |
readonly_changed_cb(zfsvfs, B_TRUE); |
|
2354
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
342 |
} else { |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
343 |
if (!dmu_objset_is_snapshot(zfsvfs->z_os)) |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
344 |
readonly_changed_cb(zfsvfs, B_FALSE); |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
345 |
else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
346 |
return (EROFS); |
1544 | 347 |
} |
348 |
||
349 |
if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { |
|
350 |
devices_changed_cb(zfsvfs, B_FALSE); |
|
351 |
setuid_changed_cb(zfsvfs, B_FALSE); |
|
352 |
} else { |
|
353 |
if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) |
|
354 |
devices_changed_cb(zfsvfs, B_FALSE); |
|
355 |
else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) |
|
356 |
devices_changed_cb(zfsvfs, B_TRUE); |
|
357 |
||
358 |
if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) |
|
359 |
setuid_changed_cb(zfsvfs, B_FALSE); |
|
360 |
else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) |
|
361 |
setuid_changed_cb(zfsvfs, B_TRUE); |
|
362 |
} |
|
363 |
||
364 |
if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) |
|
365 |
exec_changed_cb(zfsvfs, B_FALSE); |
|
366 |
else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) |
|
367 |
exec_changed_cb(zfsvfs, B_TRUE); |
|
368 |
||
2474
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
369 |
if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
370 |
atime_changed_cb(zfsvfs, B_TRUE); |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
371 |
else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
372 |
atime_changed_cb(zfsvfs, B_FALSE); |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
373 |
|
1544 | 374 |
return (0); |
375 |
} |
|
376 |
||
377 |
static int |
|
378 |
zfs_register_callbacks(vfs_t *vfsp) |
|
379 |
{ |
|
380 |
struct dsl_dataset *ds = NULL; |
|
381 |
objset_t *os = NULL; |
|
382 |
zfsvfs_t *zfsvfs = NULL; |
|
383 |
int do_readonly = FALSE, readonly; |
|
384 |
int do_setuid = FALSE, setuid; |
|
385 |
int do_exec = FALSE, exec; |
|
386 |
int do_devices = FALSE, devices; |
|
387 |
int error = 0; |
|
388 |
||
389 |
ASSERT(vfsp); |
|
390 |
zfsvfs = vfsp->vfs_data; |
|
391 |
ASSERT(zfsvfs); |
|
392 |
os = zfsvfs->z_os; |
|
393 |
||
394 |
/* |
|
395 |
* The act of registering our callbacks will destroy any mount |
|
396 |
* options we may have. In order to enable temporary overrides |
|
397 |
* of mount options, we stash away the current values and restore |
|
398 |
* restore them after we register the callbacks. |
|
399 |
*/ |
|
400 |
if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { |
|
401 |
readonly = B_TRUE; |
|
402 |
do_readonly = B_TRUE; |
|
403 |
} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { |
|
404 |
readonly = B_FALSE; |
|
405 |
do_readonly = B_TRUE; |
|
406 |
} |
|
407 |
if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { |
|
408 |
devices = B_FALSE; |
|
409 |
setuid = B_FALSE; |
|
410 |
do_devices = B_TRUE; |
|
411 |
do_setuid = B_TRUE; |
|
412 |
} else { |
|
413 |
if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { |
|
414 |
devices = B_FALSE; |
|
415 |
do_devices = B_TRUE; |
|
416 |
} else if (vfs_optionisset(vfsp, |
|
417 |
MNTOPT_DEVICES, NULL)) { |
|
418 |
devices = B_TRUE; |
|
419 |
do_devices = B_TRUE; |
|
420 |
} |
|
421 |
||
422 |
if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { |
|
423 |
setuid = B_FALSE; |
|
424 |
do_setuid = B_TRUE; |
|
425 |
} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { |
|
426 |
setuid = B_TRUE; |
|
427 |
do_setuid = B_TRUE; |
|
428 |
} |
|
429 |
} |
|
430 |
if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { |
|
431 |
exec = B_FALSE; |
|
432 |
do_exec = B_TRUE; |
|
433 |
} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { |
|
434 |
exec = B_TRUE; |
|
435 |
do_exec = B_TRUE; |
|
436 |
} |
|
437 |
||
438 |
/* |
|
439 |
* Register property callbacks. |
|
440 |
* |
|
441 |
* It would probably be fine to just check for i/o error from |
|
442 |
* the first prop_register(), but I guess I like to go |
|
443 |
* overboard... |
|
444 |
*/ |
|
445 |
ds = dmu_objset_ds(os); |
|
446 |
error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); |
|
447 |
error = error ? error : dsl_prop_register(ds, |
|
448 |
"recordsize", blksz_changed_cb, zfsvfs); |
|
449 |
error = error ? error : dsl_prop_register(ds, |
|
450 |
"readonly", readonly_changed_cb, zfsvfs); |
|
451 |
error = error ? error : dsl_prop_register(ds, |
|
452 |
"devices", devices_changed_cb, zfsvfs); |
|
453 |
error = error ? error : dsl_prop_register(ds, |
|
454 |
"setuid", setuid_changed_cb, zfsvfs); |
|
455 |
error = error ? error : dsl_prop_register(ds, |
|
456 |
"exec", exec_changed_cb, zfsvfs); |
|
457 |
error = error ? error : dsl_prop_register(ds, |
|
458 |
"snapdir", snapdir_changed_cb, zfsvfs); |
|
459 |
error = error ? error : dsl_prop_register(ds, |
|
460 |
"aclmode", acl_mode_changed_cb, zfsvfs); |
|
461 |
error = error ? error : dsl_prop_register(ds, |
|
462 |
"aclinherit", acl_inherit_changed_cb, zfsvfs); |
|
463 |
if (error) |
|
464 |
goto unregister; |
|
465 |
||
466 |
/* |
|
467 |
* Invoke our callbacks to restore temporary mount options. |
|
468 |
*/ |
|
469 |
if (do_readonly) |
|
470 |
readonly_changed_cb(zfsvfs, readonly); |
|
471 |
if (do_setuid) |
|
472 |
setuid_changed_cb(zfsvfs, setuid); |
|
473 |
if (do_exec) |
|
474 |
exec_changed_cb(zfsvfs, exec); |
|
475 |
if (do_devices) |
|
476 |
devices_changed_cb(zfsvfs, devices); |
|
477 |
||
478 |
return (0); |
|
479 |
||
480 |
unregister: |
|
481 |
/* |
|
482 |
* We may attempt to unregister some callbacks that are not |
|
483 |
* registered, but this is OK; it will simply return ENOMSG, |
|
484 |
* which we will ignore. |
|
485 |
*/ |
|
486 |
(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); |
|
487 |
(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); |
|
488 |
(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); |
|
489 |
(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); |
|
490 |
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); |
|
491 |
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); |
|
492 |
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); |
|
493 |
(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); |
|
494 |
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, |
|
495 |
zfsvfs); |
|
496 |
return (error); |
|
497 |
||
498 |
} |
|
499 |
||
500 |
static int |
|
501 |
zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) |
|
502 |
{ |
|
503 |
dev_t mount_dev; |
|
504 |
uint64_t recordsize, readonly; |
|
505 |
int error = 0; |
|
506 |
int mode; |
|
507 |
zfsvfs_t *zfsvfs; |
|
508 |
znode_t *zp = NULL; |
|
509 |
||
510 |
ASSERT(vfsp); |
|
511 |
ASSERT(osname); |
|
512 |
||
513 |
/* |
|
514 |
* Initialize the zfs-specific filesystem structure. |
|
515 |
* Should probably make this a kmem cache, shuffle fields, |
|
516 |
* and just bzero up to z_hold_mtx[]. |
|
517 |
*/ |
|
518 |
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); |
|
519 |
zfsvfs->z_vfs = vfsp; |
|
520 |
zfsvfs->z_parent = zfsvfs; |
|
521 |
zfsvfs->z_assign = TXG_NOWAIT; |
|
522 |
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; |
|
523 |
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; |
|
524 |
||
525 |
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); |
|
526 |
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), |
|
527 |
offsetof(znode_t, z_link_node)); |
|
528 |
rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); |
|
529 |
||
530 |
/* Initialize the generic filesystem structure. */ |
|
531 |
vfsp->vfs_bcount = 0; |
|
532 |
vfsp->vfs_data = NULL; |
|
533 |
||
534 |
if (zfs_create_unique_device(&mount_dev) == -1) { |
|
535 |
error = ENODEV; |
|
536 |
goto out; |
|
537 |
} |
|
538 |
ASSERT(vfs_devismounted(mount_dev) == 0); |
|
539 |
||
540 |
if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, |
|
541 |
NULL)) |
|
542 |
goto out; |
|
543 |
||
544 |
vfsp->vfs_dev = mount_dev; |
|
545 |
vfsp->vfs_fstype = zfsfstype; |
|
546 |
vfsp->vfs_bsize = recordsize; |
|
547 |
vfsp->vfs_flag |= VFS_NOTRUNC; |
|
548 |
vfsp->vfs_data = zfsvfs; |
|
549 |
||
550 |
if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) |
|
551 |
goto out; |
|
552 |
||
553 |
if (readonly) |
|
554 |
mode = DS_MODE_PRIMARY | DS_MODE_READONLY; |
|
555 |
else |
|
556 |
mode = DS_MODE_PRIMARY; |
|
557 |
||
558 |
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); |
|
559 |
if (error == EROFS) { |
|
560 |
mode = DS_MODE_PRIMARY | DS_MODE_READONLY; |
|
561 |
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, |
|
562 |
&zfsvfs->z_os); |
|
563 |
} |
|
564 |
||
565 |
if (error) |
|
566 |
goto out; |
|
567 |
||
568 |
if (error = zfs_init_fs(zfsvfs, &zp, cr)) |
|
569 |
goto out; |
|
570 |
||
571 |
/* The call to zfs_init_fs leaves the vnode held, release it here. */ |
|
572 |
VN_RELE(ZTOV(zp)); |
|
573 |
||
574 |
if (dmu_objset_is_snapshot(zfsvfs->z_os)) { |
|
575 |
ASSERT(mode & DS_MODE_READONLY); |
|
576 |
atime_changed_cb(zfsvfs, B_FALSE); |
|
577 |
readonly_changed_cb(zfsvfs, B_TRUE); |
|
578 |
zfsvfs->z_issnap = B_TRUE; |
|
579 |
} else { |
|
580 |
error = zfs_register_callbacks(vfsp); |
|
581 |
if (error) |
|
582 |
goto out; |
|
583 |
||
584 |
/* |
|
585 |
* Start a delete thread running. |
|
586 |
*/ |
|
587 |
(void) zfs_delete_thread_target(zfsvfs, 1); |
|
588 |
||
589 |
/* |
|
590 |
* Parse and replay the intent log. |
|
591 |
*/ |
|
592 |
zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, |
|
593 |
zfs_replay_vector, (void (*)(void *))zfs_delete_wait_empty); |
|
594 |
||
595 |
if (!zil_disable) |
|
596 |
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); |
|
597 |
} |
|
598 |
||
599 |
if (!zfsvfs->z_issnap) |
|
600 |
zfsctl_create(zfsvfs); |
|
601 |
out: |
|
602 |
if (error) { |
|
603 |
if (zfsvfs->z_os) |
|
604 |
dmu_objset_close(zfsvfs->z_os); |
|
605 |
kmem_free(zfsvfs, sizeof (zfsvfs_t)); |
|
606 |
} else { |
|
607 |
atomic_add_32(&zfs_active_fs_count, 1); |
|
608 |
} |
|
609 |
||
610 |
return (error); |
|
611 |
||
612 |
} |
|
613 |
||
614 |
void |
|
615 |
zfs_unregister_callbacks(zfsvfs_t *zfsvfs) |
|
616 |
{ |
|
617 |
objset_t *os = zfsvfs->z_os; |
|
618 |
struct dsl_dataset *ds; |
|
619 |
||
620 |
/* |
|
621 |
* Unregister properties. |
|
622 |
*/ |
|
623 |
if (!dmu_objset_is_snapshot(os)) { |
|
624 |
ds = dmu_objset_ds(os); |
|
625 |
VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, |
|
626 |
zfsvfs) == 0); |
|
627 |
||
628 |
VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, |
|
629 |
zfsvfs) == 0); |
|
630 |
||
631 |
VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, |
|
632 |
zfsvfs) == 0); |
|
633 |
||
634 |
VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, |
|
635 |
zfsvfs) == 0); |
|
636 |
||
637 |
VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, |
|
638 |
zfsvfs) == 0); |
|
639 |
||
640 |
VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, |
|
641 |
zfsvfs) == 0); |
|
642 |
||
643 |
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, |
|
644 |
zfsvfs) == 0); |
|
645 |
||
646 |
VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, |
|
647 |
zfsvfs) == 0); |
|
648 |
||
649 |
VERIFY(dsl_prop_unregister(ds, "aclinherit", |
|
650 |
acl_inherit_changed_cb, zfsvfs) == 0); |
|
651 |
} |
|
652 |
} |
|
653 |
||
654 |
static int |
|
655 |
zfs_mountroot(vfs_t *vfsp, enum whymountroot why) |
|
656 |
{ |
|
657 |
int error = 0; |
|
658 |
int ret = 0; |
|
659 |
static int zfsrootdone = 0; |
|
660 |
zfsvfs_t *zfsvfs = NULL; |
|
661 |
znode_t *zp = NULL; |
|
662 |
vnode_t *vp = NULL; |
|
663 |
||
664 |
ASSERT(vfsp); |
|
665 |
||
666 |
/* |
|
667 |
* The filesystem that we mount as root is defined in |
|
668 |
* /etc/system using the zfsroot variable. The value defined |
|
669 |
* there is copied early in startup code to zfs_bootpath |
|
670 |
* (defined in modsysfile.c). |
|
671 |
*/ |
|
672 |
if (why == ROOT_INIT) { |
|
673 |
if (zfsrootdone++) |
|
674 |
return (EBUSY); |
|
675 |
||
676 |
/* |
|
677 |
* This needs to be done here, so that when we return from |
|
678 |
* mountroot, the vfs resource name will be set correctly. |
|
679 |
*/ |
|
680 |
if (snprintf(rootfs.bo_name, BO_MAXOBJNAME, "%s", zfs_bootpath) |
|
681 |
>= BO_MAXOBJNAME) |
|
682 |
return (ENAMETOOLONG); |
|
683 |
||
684 |
if (error = vfs_lock(vfsp)) |
|
685 |
return (error); |
|
686 |
||
687 |
if (error = zfs_domount(vfsp, zfs_bootpath, CRED())) |
|
688 |
goto out; |
|
689 |
||
690 |
zfsvfs = (zfsvfs_t *)vfsp->vfs_data; |
|
691 |
ASSERT(zfsvfs); |
|
692 |
if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) |
|
693 |
goto out; |
|
694 |
||
695 |
vp = ZTOV(zp); |
|
696 |
mutex_enter(&vp->v_lock); |
|
697 |
vp->v_flag |= VROOT; |
|
698 |
mutex_exit(&vp->v_lock); |
|
699 |
rootvp = vp; |
|
700 |
||
701 |
/* |
|
702 |
* The zfs_zget call above returns with a hold on vp, we release |
|
703 |
* it here. |
|
704 |
*/ |
|
705 |
VN_RELE(vp); |
|
706 |
||
707 |
/* |
|
708 |
* Mount root as readonly initially, it will be remouted |
|
709 |
* read/write by /lib/svc/method/fs-usr. |
|
710 |
*/ |
|
711 |
readonly_changed_cb(vfsp->vfs_data, B_TRUE); |
|
712 |
vfs_add((struct vnode *)0, vfsp, |
|
713 |
(vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); |
|
714 |
out: |
|
715 |
vfs_unlock(vfsp); |
|
716 |
ret = (error) ? error : 0; |
|
717 |
return (ret); |
|
718 |
||
719 |
} else if (why == ROOT_REMOUNT) { |
|
720 |
||
721 |
readonly_changed_cb(vfsp->vfs_data, B_FALSE); |
|
722 |
vfsp->vfs_flag |= VFS_REMOUNT; |
|
723 |
return (zfs_refresh_properties(vfsp)); |
|
724 |
||
725 |
} else if (why == ROOT_UNMOUNT) { |
|
726 |
zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); |
|
727 |
(void) zfs_sync(vfsp, 0, 0); |
|
728 |
return (0); |
|
729 |
} |
|
730 |
||
731 |
/* |
|
732 |
* if "why" is equal to anything else other than ROOT_INIT, |
|
733 |
* ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. |
|
734 |
*/ |
|
735 |
return (ENOTSUP); |
|
736 |
} |
|
737 |
||
789 | 738 |
/*ARGSUSED*/ |
739 |
static int |
|
740 |
zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) |
|
741 |
{ |
|
742 |
char *osname; |
|
743 |
pathname_t spn; |
|
744 |
int error = 0; |
|
745 |
uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? |
|
746 |
UIO_SYSSPACE : UIO_USERSPACE; |
|
747 |
int canwrite; |
|
748 |
||
749 |
if (mvp->v_type != VDIR) |
|
750 |
return (ENOTDIR); |
|
751 |
||
752 |
mutex_enter(&mvp->v_lock); |
|
753 |
if ((uap->flags & MS_REMOUNT) == 0 && |
|
754 |
(uap->flags & MS_OVERLAY) == 0 && |
|
755 |
(mvp->v_count != 1 || (mvp->v_flag & VROOT))) { |
|
756 |
mutex_exit(&mvp->v_lock); |
|
757 |
return (EBUSY); |
|
758 |
} |
|
759 |
mutex_exit(&mvp->v_lock); |
|
760 |
||
761 |
/* |
|
762 |
* ZFS does not support passing unparsed data in via MS_DATA. |
|
763 |
* Users should use the MS_OPTIONSTR interface; this means |
|
764 |
* that all option parsing is already done and the options struct |
|
765 |
* can be interrogated. |
|
766 |
*/ |
|
767 |
if ((uap->flags & MS_DATA) && uap->datalen > 0) |
|
768 |
return (EINVAL); |
|
769 |
||
770 |
/* |
|
771 |
* When doing a remount, we simply refresh our temporary properties |
|
772 |
* according to those options set in the current VFS options. |
|
773 |
*/ |
|
774 |
if (uap->flags & MS_REMOUNT) { |
|
1544 | 775 |
return (zfs_refresh_properties(vfsp)); |
789 | 776 |
} |
777 |
||
778 |
/* |
|
779 |
* Get the objset name (the "special" mount argument). |
|
780 |
*/ |
|
781 |
if (error = pn_get(uap->spec, fromspace, &spn)) |
|
782 |
return (error); |
|
783 |
||
784 |
osname = spn.pn_path; |
|
785 |
||
786 |
if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) |
|
787 |
goto out; |
|
788 |
||
789 |
/* |
|
790 |
* Refuse to mount a filesystem if we are in a local zone and the |
|
791 |
* dataset is not visible. |
|
792 |
*/ |
|
793 |
if (!INGLOBALZONE(curproc) && |
|
794 |
(!zone_dataset_visible(osname, &canwrite) || !canwrite)) { |
|
795 |
error = EPERM; |
|
796 |
goto out; |
|
797 |
} |
|
798 |
||
1544 | 799 |
error = zfs_domount(vfsp, osname, cr); |
789 | 800 |
|
801 |
out: |
|
802 |
pn_free(&spn); |
|
803 |
return (error); |
|
804 |
} |
|
805 |
||
806 |
static int |
|
807 |
zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) |
|
808 |
{ |
|
809 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
810 |
dmu_objset_stats_t dstats; |
|
811 |
dev32_t d32; |
|
812 |
||
813 |
ZFS_ENTER(zfsvfs); |
|
814 |
||
815 |
dmu_objset_stats(zfsvfs->z_os, &dstats); |
|
816 |
||
817 |
/* |
|
818 |
* The underlying storage pool actually uses multiple block sizes. |
|
819 |
* We report the fragsize as the smallest block size we support, |
|
820 |
* and we report our blocksize as the filesystem's maximum blocksize. |
|
821 |
*/ |
|
822 |
statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; |
|
823 |
statp->f_bsize = zfsvfs->z_max_blksz; |
|
824 |
||
825 |
/* |
|
826 |
* The following report "total" blocks of various kinds in the |
|
827 |
* file system, but reported in terms of f_frsize - the |
|
828 |
* "fragment" size. |
|
829 |
*/ |
|
830 |
||
831 |
statp->f_blocks = |
|
832 |
(dstats.dds_space_refd + dstats.dds_available) >> SPA_MINBLOCKSHIFT; |
|
833 |
statp->f_bfree = dstats.dds_available >> SPA_MINBLOCKSHIFT; |
|
834 |
statp->f_bavail = statp->f_bfree; /* no root reservation */ |
|
835 |
||
836 |
/* |
|
837 |
* statvfs() should really be called statufs(), because it assumes |
|
838 |
* static metadata. ZFS doesn't preallocate files, so the best |
|
839 |
* we can do is report the max that could possibly fit in f_files, |
|
840 |
* and that minus the number actually used in f_ffree. |
|
841 |
* For f_ffree, report the smaller of the number of object available |
|
842 |
* and the number of blocks (each object will take at least a block). |
|
843 |
*/ |
|
844 |
statp->f_ffree = MIN(dstats.dds_objects_avail, statp->f_bfree); |
|
845 |
statp->f_favail = statp->f_ffree; /* no "root reservation" */ |
|
846 |
statp->f_files = statp->f_ffree + dstats.dds_objects_used; |
|
847 |
||
848 |
(void) cmpldev(&d32, vfsp->vfs_dev); |
|
849 |
statp->f_fsid = d32; |
|
850 |
||
851 |
/* |
|
852 |
* We're a zfs filesystem. |
|
853 |
*/ |
|
854 |
(void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); |
|
855 |
||
1123
02a0390fbc7d
6363529 UNIX03/UNIX98 *vsx* CAPI.os/files/fstatvfs/T.fstatvfs 11 FAILS
marks
parents:
849
diff
changeset
|
856 |
statp->f_flag = vf_to_stf(vfsp->vfs_flag); |
789 | 857 |
|
858 |
statp->f_namemax = ZFS_MAXNAMELEN; |
|
859 |
||
860 |
/* |
|
861 |
* We have all of 32 characters to stuff a string here. |
|
862 |
* Is there anything useful we could/should provide? |
|
863 |
*/ |
|
864 |
bzero(statp->f_fstr, sizeof (statp->f_fstr)); |
|
865 |
||
866 |
ZFS_EXIT(zfsvfs); |
|
867 |
return (0); |
|
868 |
} |
|
869 |
||
870 |
static int |
|
871 |
zfs_root(vfs_t *vfsp, vnode_t **vpp) |
|
872 |
{ |
|
873 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
874 |
znode_t *rootzp; |
|
875 |
int error; |
|
876 |
||
877 |
ZFS_ENTER(zfsvfs); |
|
878 |
||
879 |
error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); |
|
880 |
if (error == 0) |
|
881 |
*vpp = ZTOV(rootzp); |
|
882 |
||
883 |
ZFS_EXIT(zfsvfs); |
|
884 |
return (error); |
|
885 |
} |
|
886 |
||
887 |
/*ARGSUSED*/ |
|
888 |
static int |
|
889 |
zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) |
|
890 |
{ |
|
891 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
892 |
int ret; |
|
893 |
||
894 |
if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) |
|
895 |
return (ret); |
|
896 |
||
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
897 |
|
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
898 |
(void) dnlc_purge_vfsp(vfsp, 0); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
899 |
|
789 | 900 |
/* |
901 |
* Unmount any snapshots mounted under .zfs before unmounting the |
|
902 |
* dataset itself. |
|
903 |
*/ |
|
904 |
if (zfsvfs->z_ctldir != NULL && |
|
905 |
(ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) |
|
906 |
return (ret); |
|
907 |
||
908 |
if (fflag & MS_FORCE) { |
|
909 |
vfsp->vfs_flag |= VFS_UNMOUNTED; |
|
910 |
zfsvfs->z_unmounted1 = B_TRUE; |
|
911 |
||
912 |
/* |
|
913 |
* Wait for all zfs threads to leave zfs. |
|
914 |
* Grabbing a rwlock as reader in all vops and |
|
915 |
* as writer here doesn't work because it too easy to get |
|
916 |
* multiple reader enters as zfs can re-enter itself. |
|
917 |
* This can lead to deadlock if there is an intervening |
|
918 |
* rw_enter as writer. |
|
919 |
* So a file system threads ref count (z_op_cnt) is used. |
|
920 |
* A polling loop on z_op_cnt may seem inefficient, but |
|
921 |
* - this saves all threads on exit from having to grab a |
|
922 |
* mutex in order to cv_signal |
|
923 |
* - only occurs on forced unmount in the rare case when |
|
924 |
* there are outstanding threads within the file system. |
|
925 |
*/ |
|
926 |
while (zfsvfs->z_op_cnt) { |
|
927 |
delay(1); |
|
928 |
} |
|
929 |
||
930 |
zfs_objset_close(zfsvfs); |
|
931 |
||
932 |
return (0); |
|
933 |
} |
|
934 |
/* |
|
935 |
* Stop all delete threads. |
|
936 |
*/ |
|
937 |
(void) zfs_delete_thread_target(zfsvfs, 0); |
|
938 |
||
939 |
/* |
|
940 |
* Check the number of active vnodes in the file system. |
|
941 |
* Our count is maintained in the vfs structure, but the number |
|
942 |
* is off by 1 to indicate a hold on the vfs structure itself. |
|
943 |
* |
|
944 |
* The '.zfs' directory maintains a reference of its own, and any active |
|
945 |
* references underneath are reflected in the vnode count. |
|
946 |
*/ |
|
947 |
if (zfsvfs->z_ctldir == NULL) { |
|
948 |
if (vfsp->vfs_count > 1) { |
|
949 |
if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) |
|
950 |
(void) zfs_delete_thread_target(zfsvfs, 1); |
|
951 |
return (EBUSY); |
|
952 |
} |
|
953 |
} else { |
|
954 |
if (vfsp->vfs_count > 2 || |
|
955 |
(zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { |
|
956 |
if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) |
|
957 |
(void) zfs_delete_thread_target(zfsvfs, 1); |
|
958 |
return (EBUSY); |
|
959 |
} |
|
960 |
} |
|
961 |
||
962 |
vfsp->vfs_flag |= VFS_UNMOUNTED; |
|
963 |
zfs_objset_close(zfsvfs); |
|
964 |
||
965 |
return (0); |
|
966 |
} |
|
967 |
||
968 |
static int |
|
969 |
zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) |
|
970 |
{ |
|
971 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
972 |
znode_t *zp; |
|
973 |
uint64_t object = 0; |
|
974 |
uint64_t fid_gen = 0; |
|
975 |
uint64_t gen_mask; |
|
976 |
uint64_t zp_gen; |
|
977 |
int i, err; |
|
978 |
||
979 |
*vpp = NULL; |
|
980 |
||
981 |
ZFS_ENTER(zfsvfs); |
|
982 |
||
983 |
if (fidp->fid_len == LONG_FID_LEN) { |
|
984 |
zfid_long_t *zlfid = (zfid_long_t *)fidp; |
|
985 |
uint64_t objsetid = 0; |
|
986 |
uint64_t setgen = 0; |
|
987 |
||
988 |
for (i = 0; i < sizeof (zlfid->zf_setid); i++) |
|
989 |
objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); |
|
990 |
||
991 |
for (i = 0; i < sizeof (zlfid->zf_setgen); i++) |
|
992 |
setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); |
|
993 |
||
994 |
ZFS_EXIT(zfsvfs); |
|
995 |
||
996 |
err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); |
|
997 |
if (err) |
|
998 |
return (EINVAL); |
|
999 |
ZFS_ENTER(zfsvfs); |
|
1000 |
} |
|
1001 |
||
1002 |
if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { |
|
1003 |
zfid_short_t *zfid = (zfid_short_t *)fidp; |
|
1004 |
||
1005 |
for (i = 0; i < sizeof (zfid->zf_object); i++) |
|
1006 |
object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); |
|
1007 |
||
1008 |
for (i = 0; i < sizeof (zfid->zf_gen); i++) |
|
1009 |
fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); |
|
1010 |
} else { |
|
1011 |
ZFS_EXIT(zfsvfs); |
|
1012 |
return (EINVAL); |
|
1013 |
} |
|
1014 |
||
1015 |
/* A zero fid_gen means we are in the .zfs control directories */ |
|
1016 |
if (fid_gen == 0 && |
|
1017 |
(object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { |
|
1018 |
*vpp = zfsvfs->z_ctldir; |
|
1019 |
ASSERT(*vpp != NULL); |
|
1020 |
if (object == ZFSCTL_INO_SNAPDIR) { |
|
1021 |
VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, |
|
1022 |
0, NULL, NULL) == 0); |
|
1023 |
} else { |
|
1024 |
VN_HOLD(*vpp); |
|
1025 |
} |
|
1026 |
ZFS_EXIT(zfsvfs); |
|
1027 |
return (0); |
|
1028 |
} |
|
1029 |
||
1030 |
gen_mask = -1ULL >> (64 - 8 * i); |
|
1031 |
||
1032 |
dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); |
|
1033 |
if (err = zfs_zget(zfsvfs, object, &zp)) { |
|
1034 |
ZFS_EXIT(zfsvfs); |
|
1035 |
return (err); |
|
1036 |
} |
|
1037 |
zp_gen = zp->z_phys->zp_gen & gen_mask; |
|
1038 |
if (zp_gen == 0) |
|
1039 |
zp_gen = 1; |
|
1040 |
if (zp->z_reap || zp_gen != fid_gen) { |
|
1041 |
dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); |
|
1042 |
VN_RELE(ZTOV(zp)); |
|
1043 |
ZFS_EXIT(zfsvfs); |
|
1044 |
return (EINVAL); |
|
1045 |
} |
|
1046 |
||
1047 |
*vpp = ZTOV(zp); |
|
1048 |
ZFS_EXIT(zfsvfs); |
|
1049 |
return (0); |
|
1050 |
} |
|
1051 |
||
1052 |
static void |
|
1053 |
zfs_objset_close(zfsvfs_t *zfsvfs) |
|
1054 |
{ |
|
1055 |
zfs_delete_t *zd = &zfsvfs->z_delete_head; |
|
1056 |
znode_t *zp, *nextzp; |
|
1057 |
objset_t *os = zfsvfs->z_os; |
|
1058 |
||
1059 |
/* |
|
1060 |
* Stop all delete threads. |
|
1061 |
*/ |
|
1062 |
(void) zfs_delete_thread_target(zfsvfs, 0); |
|
1063 |
||
1064 |
/* |
|
1065 |
* For forced unmount, at this point all vops except zfs_inactive |
|
1066 |
* are erroring EIO. We need to now suspend zfs_inactive threads |
|
1067 |
* while we are freeing dbufs before switching zfs_inactive |
|
1068 |
* to use behaviour without a objset. |
|
1069 |
*/ |
|
1070 |
rw_enter(&zfsvfs->z_um_lock, RW_WRITER); |
|
1071 |
||
1072 |
/* |
|
1073 |
* Release all delete in progress znodes |
|
1074 |
* They will be processed when the file system remounts. |
|
1075 |
*/ |
|
1076 |
mutex_enter(&zd->z_mutex); |
|
1077 |
while (zp = list_head(&zd->z_znodes)) { |
|
1078 |
list_remove(&zd->z_znodes, zp); |
|
1079 |
zp->z_dbuf_held = 0; |
|
1544 | 1080 |
dmu_buf_rele(zp->z_dbuf, NULL); |
789 | 1081 |
} |
1082 |
mutex_exit(&zd->z_mutex); |
|
1083 |
||
1084 |
/* |
|
1085 |
* Release all holds on dbufs |
|
1086 |
* Note, although we have stopped all other vop threads and |
|
1087 |
* zfs_inactive(), the dmu can callback via znode_pageout_func() |
|
1088 |
* which can zfs_znode_free() the znode. |
|
1089 |
* So we lock z_all_znodes; search the list for a held |
|
1090 |
* dbuf; drop the lock (we know zp can't disappear if we hold |
|
1091 |
* a dbuf lock; then regrab the lock and restart. |
|
1092 |
*/ |
|
1093 |
mutex_enter(&zfsvfs->z_znodes_lock); |
|
1094 |
for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { |
|
1095 |
nextzp = list_next(&zfsvfs->z_all_znodes, zp); |
|
1096 |
if (zp->z_dbuf_held) { |
|
1097 |
/* dbufs should only be held when force unmounting */ |
|
1098 |
zp->z_dbuf_held = 0; |
|
1099 |
mutex_exit(&zfsvfs->z_znodes_lock); |
|
1544 | 1100 |
dmu_buf_rele(zp->z_dbuf, NULL); |
789 | 1101 |
/* Start again */ |
1102 |
mutex_enter(&zfsvfs->z_znodes_lock); |
|
1103 |
nextzp = list_head(&zfsvfs->z_all_znodes); |
|
1104 |
} |
|
1105 |
} |
|
1106 |
mutex_exit(&zfsvfs->z_znodes_lock); |
|
1107 |
||
1108 |
/* |
|
1109 |
* Unregister properties. |
|
1110 |
*/ |
|
1544 | 1111 |
if (!dmu_objset_is_snapshot(os)) |
1112 |
zfs_unregister_callbacks(zfsvfs); |
|
789 | 1113 |
|
1114 |
/* |
|
1115 |
* Switch zfs_inactive to behaviour without an objset. |
|
1116 |
* It just tosses cached pages and frees the znode & vnode. |
|
1117 |
* Then re-enable zfs_inactive threads in that new behaviour. |
|
1118 |
*/ |
|
1119 |
zfsvfs->z_unmounted2 = B_TRUE; |
|
1120 |
rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ |
|
1121 |
||
1122 |
/* |
|
1123 |
* Close the zil. Can't close the zil while zfs_inactive |
|
1124 |
* threads are blocked as zil_close can call zfs_inactive. |
|
1125 |
*/ |
|
1126 |
if (zfsvfs->z_log) { |
|
1127 |
zil_close(zfsvfs->z_log); |
|
1128 |
zfsvfs->z_log = NULL; |
|
1129 |
} |
|
1130 |
||
1131 |
/* |
|
1544 | 1132 |
* Evict all dbufs so that cached znodes will be freed |
1133 |
*/ |
|
1646
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1134 |
if (dmu_objset_evict_dbufs(os, 1)) { |
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1135 |
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); |
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1136 |
(void) dmu_objset_evict_dbufs(os, 0); |
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1137 |
} |
1544 | 1138 |
|
1139 |
/* |
|
789 | 1140 |
* Finally close the objset |
1141 |
*/ |
|
1142 |
dmu_objset_close(os); |
|
1143 |
||
1298 | 1144 |
/* |
1145 |
* We can now safely destroy the '.zfs' directory node. |
|
1146 |
*/ |
|
1147 |
if (zfsvfs->z_ctldir != NULL) |
|
1148 |
zfsctl_destroy(zfsvfs); |
|
1149 |
||
789 | 1150 |
} |
1151 |
||
1152 |
static void |
|
1153 |
zfs_freevfs(vfs_t *vfsp) |
|
1154 |
{ |
|
1155 |
zfsvfs_t *zfsvfs = vfsp->vfs_data; |
|
1156 |
||
1157 |
kmem_free(zfsvfs, sizeof (zfsvfs_t)); |
|
1158 |
||
1159 |
atomic_add_32(&zfs_active_fs_count, -1); |
|
1160 |
} |
|
1161 |
||
1162 |
/* |
|
1163 |
* VFS_INIT() initialization. Note that there is no VFS_FINI(), |
|
1164 |
* so we can't safely do any non-idempotent initialization here. |
|
1165 |
* Leave that to zfs_init() and zfs_fini(), which are called |
|
1166 |
* from the module's _init() and _fini() entry points. |
|
1167 |
*/ |
|
1168 |
/*ARGSUSED*/ |
|
1169 |
static int |
|
1170 |
zfs_vfsinit(int fstype, char *name) |
|
1171 |
{ |
|
1172 |
int error; |
|
1173 |
||
1174 |
zfsfstype = fstype; |
|
1175 |
||
1176 |
/* |
|
1177 |
* Setup vfsops and vnodeops tables. |
|
1178 |
*/ |
|
1179 |
error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); |
|
1180 |
if (error != 0) { |
|
1181 |
cmn_err(CE_WARN, "zfs: bad vfs ops template"); |
|
1182 |
} |
|
1183 |
||
1184 |
error = zfs_create_op_tables(); |
|
1185 |
if (error) { |
|
1186 |
zfs_remove_op_tables(); |
|
1187 |
cmn_err(CE_WARN, "zfs: bad vnode ops template"); |
|
1188 |
(void) vfs_freevfsops_by_type(zfsfstype); |
|
1189 |
return (error); |
|
1190 |
} |
|
1191 |
||
1192 |
mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); |
|
1193 |
||
1194 |
/* |
|
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1195 |
* Unique major number for all zfs mounts. |
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1196 |
* If we run out of 32-bit minors, we'll getudev() another major. |
789 | 1197 |
*/ |
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1198 |
zfs_major = ddi_name_to_major(ZFS_DRIVER); |
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1199 |
zfs_minor = ZFS_MIN_MINOR; |
789 | 1200 |
|
1201 |
return (0); |
|
1202 |
} |
|
1203 |
||
1204 |
void |
|
1205 |
zfs_init(void) |
|
1206 |
{ |
|
1207 |
/* |
|
1208 |
* Initialize .zfs directory structures |
|
1209 |
*/ |
|
1210 |
zfsctl_init(); |
|
1211 |
||
1212 |
/* |
|
1213 |
* Initialize znode cache, vnode ops, etc... |
|
1214 |
*/ |
|
1215 |
zfs_znode_init(); |
|
1216 |
} |
|
1217 |
||
1218 |
void |
|
1219 |
zfs_fini(void) |
|
1220 |
{ |
|
1221 |
zfsctl_fini(); |
|
1222 |
zfs_znode_fini(); |
|
1223 |
} |
|
1224 |
||
1225 |
int |
|
1226 |
zfs_busy(void) |
|
1227 |
{ |
|
1228 |
return (zfs_active_fs_count != 0); |
|
1229 |
} |
|
1230 |
||
1231 |
static vfsdef_t vfw = { |
|
1232 |
VFSDEF_VERSION, |
|
1233 |
MNTTYPE_ZFS, |
|
1234 |
zfs_vfsinit, |
|
1488 | 1235 |
VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, |
789 | 1236 |
&zfs_mntopts |
1237 |
}; |
|
1238 |
||
1239 |
struct modlfs zfs_modlfs = { |
|
2676 | 1240 |
&mod_fsops, "ZFS filesystem version " ZFS_VERSION_STRING, &vfw |
789 | 1241 |
}; |