author | llai1 |
Fri, 25 Aug 2006 17:24:25 -0700 | |
changeset 2621 | 4ea88858d952 |
parent 1925 | 91047fd43318 |
child 3446 | 5903aece022d |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1488 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1488 | 22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
0 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ |
|
27 |
/* All Rights Reserved */ |
|
28 |
||
29 |
/* |
|
30 |
* University Copyright- Copyright (c) 1982, 1986, 1988 |
|
31 |
* The Regents of the University of California |
|
32 |
* All Rights Reserved |
|
33 |
* |
|
34 |
* University Acknowledgment- Portions of this document are derived from |
|
35 |
* software developed by the University of California, Berkeley, and its |
|
36 |
* contributors. |
|
37 |
*/ |
|
38 |
||
39 |
||
40 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
41 |
||
42 |
#include <sys/types.h> |
|
43 |
#include <sys/t_lock.h> |
|
44 |
#include <sys/param.h> |
|
45 |
#include <sys/errno.h> |
|
46 |
#include <sys/user.h> |
|
47 |
#include <sys/fstyp.h> |
|
48 |
#include <sys/kmem.h> |
|
49 |
#include <sys/systm.h> |
|
50 |
#include <sys/proc.h> |
|
51 |
#include <sys/mount.h> |
|
52 |
#include <sys/vfs.h> |
|
53 |
#include <sys/fem.h> |
|
54 |
#include <sys/mntent.h> |
|
55 |
#include <sys/stat.h> |
|
56 |
#include <sys/statvfs.h> |
|
57 |
#include <sys/statfs.h> |
|
58 |
#include <sys/cred.h> |
|
59 |
#include <sys/vnode.h> |
|
60 |
#include <sys/rwstlock.h> |
|
61 |
#include <sys/dnlc.h> |
|
62 |
#include <sys/file.h> |
|
63 |
#include <sys/time.h> |
|
64 |
#include <sys/atomic.h> |
|
65 |
#include <sys/cmn_err.h> |
|
66 |
#include <sys/buf.h> |
|
67 |
#include <sys/swap.h> |
|
68 |
#include <sys/debug.h> |
|
69 |
#include <sys/vnode.h> |
|
70 |
#include <sys/modctl.h> |
|
71 |
#include <sys/ddi.h> |
|
72 |
#include <sys/pathname.h> |
|
73 |
#include <sys/bootconf.h> |
|
74 |
#include <sys/dumphdr.h> |
|
75 |
#include <sys/dc_ki.h> |
|
76 |
#include <sys/poll.h> |
|
77 |
#include <sys/sunddi.h> |
|
78 |
#include <sys/sysmacros.h> |
|
79 |
#include <sys/zone.h> |
|
80 |
#include <sys/policy.h> |
|
81 |
#include <sys/ctfs.h> |
|
82 |
#include <sys/objfs.h> |
|
83 |
#include <sys/console.h> |
|
84 |
#include <sys/reboot.h> |
|
85 |
||
86 |
#include <vm/page.h> |
|
87 |
||
88 |
#include <fs/fs_subr.h> |
|
89 |
||
1520 | 90 |
/* Private interfaces to create vopstats-related data structures */ |
91 |
extern void initialize_vopstats(vopstats_t *); |
|
92 |
extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *); |
|
93 |
extern vsk_anchor_t *get_vskstat_anchor(struct vfs *); |
|
94 |
||
0 | 95 |
static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int); |
96 |
static void vfs_setmntopt_nolock(mntopts_t *, const char *, |
|
97 |
const char *, int, int); |
|
98 |
static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **); |
|
99 |
static void vfs_freemnttab(struct vfs *); |
|
100 |
static void vfs_freeopt(mntopt_t *); |
|
101 |
static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *); |
|
102 |
static void vfs_swapopttbl(mntopts_t *, mntopts_t *); |
|
103 |
static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int); |
|
104 |
static void vfs_createopttbl_extend(mntopts_t *, const char *, |
|
105 |
const mntopts_t *); |
|
106 |
static char **vfs_copycancelopt_extend(char **const, int); |
|
107 |
static void vfs_freecancelopt(char **); |
|
108 |
static char *getrootfs(void); |
|
109 |
static int getmacpath(dev_info_t *, void *); |
|
110 |
||
111 |
struct ipmnt { |
|
112 |
struct ipmnt *mip_next; |
|
113 |
dev_t mip_dev; |
|
114 |
struct vfs *mip_vfsp; |
|
115 |
}; |
|
116 |
||
117 |
static kmutex_t vfs_miplist_mutex; |
|
118 |
static struct ipmnt *vfs_miplist = NULL; |
|
119 |
static struct ipmnt *vfs_miplist_end = NULL; |
|
120 |
||
121 |
/* |
|
122 |
* VFS global data. |
|
123 |
*/ |
|
124 |
vnode_t *rootdir; /* pointer to root inode vnode. */ |
|
125 |
vnode_t *devicesdir; /* pointer to inode of devices root */ |
|
2621 | 126 |
vnode_t *devdir; /* pointer to inode of dev root */ |
0 | 127 |
|
128 |
char *server_rootpath; /* root path for diskless clients */ |
|
129 |
char *server_hostname; /* hostname of diskless server */ |
|
130 |
||
131 |
static struct vfs root; |
|
132 |
static struct vfs devices; |
|
2621 | 133 |
static struct vfs dev; |
0 | 134 |
struct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */ |
135 |
rvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */ |
|
136 |
int vfshsz = 512; /* # of heads/locks in vfs hash arrays */ |
|
137 |
/* must be power of 2! */ |
|
138 |
timespec_t vfs_mnttab_ctime; /* mnttab created time */ |
|
139 |
timespec_t vfs_mnttab_mtime; /* mnttab last modified time */ |
|
140 |
char *vfs_dummyfstype = "\0"; |
|
141 |
struct pollhead vfs_pollhd; /* for mnttab pollers */ |
|
142 |
||
143 |
/* |
|
144 |
* Table for generic options recognized in the VFS layer and acted |
|
145 |
* on at this level before parsing file system specific options. |
|
146 |
* The nosuid option is stronger than any of the devices and setuid |
|
147 |
* options, so those are canceled when nosuid is seen. |
|
148 |
* |
|
149 |
* All options which are added here need to be added to the |
|
150 |
* list of standard options in usr/src/cmd/fs.d/fslib.c as well. |
|
151 |
*/ |
|
152 |
/* |
|
153 |
* VFS Mount options table |
|
154 |
*/ |
|
155 |
static char *ro_cancel[] = { MNTOPT_RW, NULL }; |
|
156 |
static char *rw_cancel[] = { MNTOPT_RO, NULL }; |
|
157 |
static char *suid_cancel[] = { MNTOPT_NOSUID, NULL }; |
|
158 |
static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES, |
|
159 |
MNTOPT_NOSETUID, MNTOPT_SETUID, NULL }; |
|
160 |
static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL }; |
|
161 |
static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL }; |
|
162 |
static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL }; |
|
163 |
static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL }; |
|
164 |
static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL }; |
|
165 |
static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL }; |
|
166 |
static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL }; |
|
167 |
static char *noexec_cancel[] = { MNTOPT_EXEC, NULL }; |
|
168 |
||
169 |
static const mntopt_t mntopts[] = { |
|
170 |
/* |
|
171 |
* option name cancel options default arg flags |
|
172 |
*/ |
|
173 |
{ MNTOPT_REMOUNT, NULL, NULL, |
|
174 |
MO_NODISPLAY, (void *)0 }, |
|
175 |
{ MNTOPT_RO, ro_cancel, NULL, 0, |
|
176 |
(void *)0 }, |
|
177 |
{ MNTOPT_RW, rw_cancel, NULL, 0, |
|
178 |
(void *)0 }, |
|
179 |
{ MNTOPT_SUID, suid_cancel, NULL, 0, |
|
180 |
(void *)0 }, |
|
181 |
{ MNTOPT_NOSUID, nosuid_cancel, NULL, 0, |
|
182 |
(void *)0 }, |
|
183 |
{ MNTOPT_DEVICES, devices_cancel, NULL, 0, |
|
184 |
(void *)0 }, |
|
185 |
{ MNTOPT_NODEVICES, nodevices_cancel, NULL, 0, |
|
186 |
(void *)0 }, |
|
187 |
{ MNTOPT_SETUID, setuid_cancel, NULL, 0, |
|
188 |
(void *)0 }, |
|
189 |
{ MNTOPT_NOSETUID, nosetuid_cancel, NULL, 0, |
|
190 |
(void *)0 }, |
|
191 |
{ MNTOPT_NBMAND, nbmand_cancel, NULL, 0, |
|
192 |
(void *)0 }, |
|
193 |
{ MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0, |
|
194 |
(void *)0 }, |
|
195 |
{ MNTOPT_EXEC, exec_cancel, NULL, 0, |
|
196 |
(void *)0 }, |
|
197 |
{ MNTOPT_NOEXEC, noexec_cancel, NULL, 0, |
|
198 |
(void *)0 }, |
|
199 |
}; |
|
200 |
||
201 |
const mntopts_t vfs_mntopts = { |
|
202 |
sizeof (mntopts) / sizeof (mntopt_t), |
|
203 |
(mntopt_t *)&mntopts[0] |
|
204 |
}; |
|
205 |
||
206 |
/* |
|
207 |
* File system operation dispatch functions. |
|
208 |
*/ |
|
209 |
||
210 |
int |
|
211 |
fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) |
|
212 |
{ |
|
213 |
return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr); |
|
214 |
} |
|
215 |
||
216 |
int |
|
217 |
fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr) |
|
218 |
{ |
|
219 |
return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr); |
|
220 |
} |
|
221 |
||
222 |
int |
|
223 |
fsop_root(vfs_t *vfsp, vnode_t **vpp) |
|
224 |
{ |
|
225 |
refstr_t *mntpt; |
|
226 |
int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp); |
|
227 |
/* |
|
228 |
* Make sure this root has a path. With lofs, it is possible to have |
|
229 |
* a NULL mountpoint. |
|
230 |
*/ |
|
254
349581d9fc98
6175313 io provider exposes our reluctance to set vnode paths
eschrock
parents:
0
diff
changeset
|
231 |
if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) { |
0 | 232 |
mntpt = vfs_getmntpoint(vfsp); |
233 |
vn_setpath_str(*vpp, refstr_value(mntpt), |
|
234 |
strlen(refstr_value(mntpt))); |
|
235 |
refstr_rele(mntpt); |
|
236 |
} |
|
237 |
||
238 |
return (ret); |
|
239 |
} |
|
240 |
||
241 |
int |
|
242 |
fsop_statfs(vfs_t *vfsp, statvfs64_t *sp) |
|
243 |
{ |
|
244 |
return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp); |
|
245 |
} |
|
246 |
||
247 |
int |
|
248 |
fsop_sync(vfs_t *vfsp, short flag, cred_t *cr) |
|
249 |
{ |
|
250 |
return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr); |
|
251 |
} |
|
252 |
||
253 |
int |
|
254 |
fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) |
|
255 |
{ |
|
256 |
return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp); |
|
257 |
} |
|
258 |
||
259 |
int |
|
260 |
fsop_mountroot(vfs_t *vfsp, enum whymountroot reason) |
|
261 |
{ |
|
262 |
return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason); |
|
263 |
} |
|
264 |
||
265 |
void |
|
266 |
fsop_freefs(vfs_t *vfsp) |
|
267 |
{ |
|
268 |
(*(vfsp)->vfs_op->vfs_freevfs)(vfsp); |
|
269 |
} |
|
270 |
||
271 |
int |
|
272 |
fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate) |
|
273 |
{ |
|
274 |
return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate)); |
|
275 |
} |
|
276 |
||
277 |
int |
|
278 |
fsop_sync_by_kind(int fstype, short flag, cred_t *cr) |
|
279 |
{ |
|
280 |
ASSERT((fstype >= 0) && (fstype < nfstype)); |
|
281 |
||
282 |
if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype])) |
|
283 |
return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr); |
|
284 |
else |
|
285 |
return (ENOTSUP); |
|
286 |
} |
|
287 |
||
288 |
/* |
|
289 |
* File system initialization. vfs_setfsops() must be called from a file |
|
290 |
* system's init routine. |
|
291 |
*/ |
|
292 |
||
293 |
static int |
|
294 |
fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, |
|
295 |
int *unused_ops) |
|
296 |
{ |
|
297 |
static const fs_operation_trans_def_t vfs_ops_table[] = { |
|
298 |
VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount), |
|
299 |
fs_nosys, fs_nosys, |
|
300 |
||
301 |
VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount), |
|
302 |
fs_nosys, fs_nosys, |
|
303 |
||
304 |
VFSNAME_ROOT, offsetof(vfsops_t, vfs_root), |
|
305 |
fs_nosys, fs_nosys, |
|
306 |
||
307 |
VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs), |
|
308 |
fs_nosys, fs_nosys, |
|
309 |
||
310 |
VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync), |
|
311 |
(fs_generic_func_p) fs_sync, |
|
312 |
(fs_generic_func_p) fs_sync, /* No errors allowed */ |
|
313 |
||
314 |
VFSNAME_VGET, offsetof(vfsops_t, vfs_vget), |
|
315 |
fs_nosys, fs_nosys, |
|
316 |
||
317 |
VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot), |
|
318 |
fs_nosys, fs_nosys, |
|
319 |
||
320 |
VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs), |
|
321 |
(fs_generic_func_p)fs_freevfs, |
|
322 |
(fs_generic_func_p)fs_freevfs, /* Shouldn't fail */ |
|
323 |
||
324 |
VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate), |
|
325 |
(fs_generic_func_p)fs_nosys, |
|
326 |
(fs_generic_func_p)fs_nosys, |
|
327 |
||
328 |
NULL, 0, NULL, NULL |
|
329 |
}; |
|
330 |
||
331 |
return (fs_build_vector(actual, unused_ops, vfs_ops_table, template)); |
|
332 |
} |
|
333 |
||
334 |
int |
|
335 |
vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual) |
|
336 |
{ |
|
337 |
int error; |
|
338 |
int unused_ops; |
|
339 |
||
340 |
/* Verify that fstype refers to a loaded fs (and not fsid 0). */ |
|
341 |
||
342 |
if ((fstype <= 0) || (fstype >= nfstype)) |
|
343 |
return (EINVAL); |
|
344 |
||
345 |
if (!ALLOCATED_VFSSW(&vfssw[fstype])) |
|
346 |
return (EINVAL); |
|
347 |
||
348 |
/* Set up the operations vector. */ |
|
349 |
||
350 |
error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops); |
|
351 |
||
352 |
if (error != 0) |
|
353 |
return (error); |
|
354 |
||
355 |
vfssw[fstype].vsw_flag |= VSW_INSTALLED; |
|
356 |
||
357 |
if (actual != NULL) |
|
358 |
*actual = &vfssw[fstype].vsw_vfsops; |
|
359 |
||
360 |
#if DEBUG |
|
361 |
if (unused_ops != 0) |
|
362 |
cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied " |
|
363 |
"but not used", vfssw[fstype].vsw_name, unused_ops); |
|
364 |
#endif |
|
365 |
||
366 |
return (0); |
|
367 |
} |
|
368 |
||
369 |
int |
|
370 |
vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual) |
|
371 |
{ |
|
372 |
int error; |
|
373 |
int unused_ops; |
|
374 |
||
375 |
*actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP); |
|
376 |
||
377 |
error = fs_copyfsops(template, *actual, &unused_ops); |
|
378 |
if (error != 0) { |
|
379 |
kmem_free(*actual, sizeof (vfsops_t)); |
|
380 |
*actual = NULL; |
|
381 |
return (error); |
|
382 |
} |
|
383 |
||
384 |
return (0); |
|
385 |
} |
|
386 |
||
387 |
/* |
|
388 |
* Free a vfsops structure created as a result of vfs_makefsops(). |
|
389 |
* NOTE: For a vfsops structure initialized by vfs_setfsops(), use |
|
390 |
* vfs_freevfsops_by_type(). |
|
391 |
*/ |
|
392 |
void |
|
393 |
vfs_freevfsops(vfsops_t *vfsops) |
|
394 |
{ |
|
395 |
kmem_free(vfsops, sizeof (vfsops_t)); |
|
396 |
} |
|
397 |
||
398 |
/* |
|
399 |
* Since the vfsops structure is part of the vfssw table and wasn't |
|
400 |
* really allocated, we're not really freeing anything. We keep |
|
401 |
* the name for consistency with vfs_freevfsops(). We do, however, |
|
402 |
* need to take care of a little bookkeeping. |
|
403 |
* NOTE: For a vfsops structure created by vfs_setfsops(), use |
|
404 |
* vfs_freevfsops_by_type(). |
|
405 |
*/ |
|
406 |
int |
|
407 |
vfs_freevfsops_by_type(int fstype) |
|
408 |
{ |
|
409 |
||
410 |
/* Verify that fstype refers to a loaded fs (and not fsid 0). */ |
|
411 |
if ((fstype <= 0) || (fstype >= nfstype)) |
|
412 |
return (EINVAL); |
|
413 |
||
414 |
WLOCK_VFSSW(); |
|
415 |
if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) { |
|
416 |
WUNLOCK_VFSSW(); |
|
417 |
return (EINVAL); |
|
418 |
} |
|
419 |
||
420 |
vfssw[fstype].vsw_flag &= ~VSW_INSTALLED; |
|
421 |
WUNLOCK_VFSSW(); |
|
422 |
||
423 |
return (0); |
|
424 |
} |
|
425 |
||
426 |
/* Support routines used to reference vfs_op */ |
|
427 |
||
428 |
/* Set the operations vector for a vfs */ |
|
429 |
void |
|
430 |
vfs_setops(vfs_t *vfsp, vfsops_t *vfsops) |
|
431 |
{ |
|
432 |
vfsops_t *op; |
|
433 |
||
434 |
ASSERT(vfsp != NULL); |
|
435 |
ASSERT(vfsops != NULL); |
|
436 |
||
437 |
op = vfsp->vfs_op; |
|
438 |
membar_consumer(); |
|
1925 | 439 |
if ((vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) && |
0 | 440 |
casptr(&vfsp->vfs_op, op, vfsops) == op) { |
441 |
return; |
|
442 |
} |
|
443 |
fsem_setvfsops(vfsp, vfsops); |
|
444 |
} |
|
445 |
||
446 |
/* Retrieve the operations vector for a vfs */ |
|
447 |
vfsops_t * |
|
448 |
vfs_getops(vfs_t *vfsp) |
|
449 |
{ |
|
450 |
vfsops_t *op; |
|
451 |
||
452 |
ASSERT(vfsp != NULL); |
|
453 |
||
454 |
op = vfsp->vfs_op; |
|
455 |
membar_consumer(); |
|
1925 | 456 |
if ((vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) && |
457 |
op == vfsp->vfs_op) { |
|
0 | 458 |
return (op); |
459 |
} else { |
|
460 |
return (fsem_getvfsops(vfsp)); |
|
461 |
} |
|
462 |
} |
|
463 |
||
464 |
/* |
|
465 |
* Returns non-zero (1) if the vfsops matches that of the vfs. |
|
466 |
* Returns zero (0) if not. |
|
467 |
*/ |
|
468 |
int |
|
469 |
vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops) |
|
470 |
{ |
|
471 |
return (vfs_getops(vfsp) == vfsops); |
|
472 |
} |
|
473 |
||
474 |
/* |
|
475 |
* Returns non-zero (1) if the file system has installed a non-default, |
|
476 |
* non-error vfs_sync routine. Returns zero (0) otherwise. |
|
477 |
*/ |
|
478 |
int |
|
479 |
vfs_can_sync(vfs_t *vfsp) |
|
480 |
{ |
|
481 |
/* vfs_sync() routine is not the default/error function */ |
|
482 |
return (vfs_getops(vfsp)->vfs_sync != fs_sync); |
|
483 |
} |
|
484 |
||
485 |
/* |
|
486 |
* Initialize a vfs structure. |
|
487 |
*/ |
|
488 |
void |
|
489 |
vfs_init(vfs_t *vfsp, vfsops_t *op, void *data) |
|
490 |
{ |
|
491 |
vfsp->vfs_count = 0; |
|
492 |
vfsp->vfs_next = vfsp; |
|
493 |
vfsp->vfs_prev = vfsp; |
|
494 |
vfsp->vfs_zone_next = vfsp; |
|
495 |
vfsp->vfs_zone_prev = vfsp; |
|
496 |
vfsp->vfs_flag = 0; |
|
497 |
vfsp->vfs_data = (data); |
|
498 |
vfsp->vfs_resource = NULL; |
|
499 |
vfsp->vfs_mntpt = NULL; |
|
500 |
vfsp->vfs_mntopts.mo_count = 0; |
|
501 |
vfsp->vfs_mntopts.mo_list = NULL; |
|
1925 | 502 |
vfsp->vfs_implp = NULL; |
0 | 503 |
vfsp->vfs_zone = NULL; |
1488 | 504 |
/* |
1925 | 505 |
* Note: Don't initialize any member of the vfs_impl_t structure |
506 |
* here as it could be a problem for unbundled file systems. |
|
1488 | 507 |
*/ |
0 | 508 |
vfs_setops((vfsp), (op)); |
509 |
sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL); |
|
510 |
} |
|
511 |
||
1925 | 512 |
/* |
513 |
* Allocate and initialize the vfs implementation private data |
|
514 |
* structure, vfs_impl_t. |
|
515 |
*/ |
|
516 |
void |
|
517 |
vfsimpl_setup(vfs_t *vfsp) |
|
518 |
{ |
|
519 |
vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP); |
|
520 |
/* Note that this are #define'd in vfs.h */ |
|
521 |
vfsp->vfs_femhead = NULL; |
|
522 |
vfsp->vfs_vskap = NULL; |
|
523 |
vfsp->vfs_fstypevsp = NULL; |
|
524 |
} |
|
525 |
||
526 |
/* |
|
527 |
* Release the vfs_impl_t structure, if it exists. Some unbundled |
|
528 |
* filesystems may not use the newer version of vfs and thus |
|
529 |
* would not contain this implementation private data structure. |
|
530 |
*/ |
|
531 |
void |
|
532 |
vfsimpl_teardown(vfs_t *vfsp) |
|
533 |
{ |
|
534 |
vfs_impl_t *vip = vfsp->vfs_implp; |
|
535 |
||
536 |
if (vip == NULL) |
|
537 |
return; |
|
538 |
||
539 |
if (vip->vi_femhead) { |
|
540 |
ASSERT(vip->vi_femhead->femh_list == NULL); |
|
541 |
mutex_destroy(&vip->vi_femhead->femh_lock); |
|
542 |
kmem_free(vip->vi_femhead, sizeof (*(vip->vi_femhead))); |
|
543 |
vip->vi_femhead = NULL; |
|
544 |
} |
|
545 |
||
546 |
kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t)); |
|
547 |
vfsp->vfs_implp = NULL; |
|
548 |
} |
|
0 | 549 |
|
550 |
/* |
|
551 |
* VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs, |
|
552 |
* fstatvfs, and sysfs moved to common/syscall. |
|
553 |
*/ |
|
554 |
||
555 |
/* |
|
556 |
* Update every mounted file system. We call the vfs_sync operation of |
|
557 |
* each file system type, passing it a NULL vfsp to indicate that all |
|
558 |
* mounted file systems of that type should be updated. |
|
559 |
*/ |
|
560 |
void |
|
561 |
vfs_sync(int flag) |
|
562 |
{ |
|
563 |
struct vfssw *vswp; |
|
564 |
RLOCK_VFSSW(); |
|
565 |
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { |
|
566 |
if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { |
|
567 |
vfs_refvfssw(vswp); |
|
568 |
RUNLOCK_VFSSW(); |
|
569 |
(void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag, |
|
570 |
CRED()); |
|
571 |
vfs_unrefvfssw(vswp); |
|
572 |
RLOCK_VFSSW(); |
|
573 |
} |
|
574 |
} |
|
575 |
RUNLOCK_VFSSW(); |
|
576 |
} |
|
577 |
||
578 |
void |
|
579 |
sync(void) |
|
580 |
{ |
|
581 |
vfs_sync(0); |
|
582 |
} |
|
583 |
||
584 |
/* |
|
585 |
* External routines. |
|
586 |
*/ |
|
587 |
||
588 |
krwlock_t vfssw_lock; /* lock accesses to vfssw */ |
|
589 |
||
590 |
/* |
|
591 |
* Lock for accessing the vfs linked list. Initialized in vfs_mountroot(), |
|
592 |
* but otherwise should be accessed only via vfs_list_lock() and |
|
593 |
* vfs_list_unlock(). Also used to protect the timestamp for mods to the list. |
|
594 |
*/ |
|
595 |
static krwlock_t vfslist; |
|
596 |
||
597 |
/* |
|
598 |
* Mount devfs on /devices. This is done right after root is mounted |
|
599 |
* to provide device access support for the system |
|
600 |
*/ |
|
601 |
static void |
|
602 |
vfs_mountdevices(void) |
|
603 |
{ |
|
604 |
struct vfssw *vsw; |
|
605 |
struct vnode *mvp; |
|
606 |
struct mounta mounta = { /* fake mounta for devfs_mount() */ |
|
607 |
NULL, |
|
608 |
NULL, |
|
609 |
MS_SYSSPACE, |
|
610 |
NULL, |
|
611 |
NULL, |
|
612 |
0, |
|
613 |
NULL, |
|
614 |
0 |
|
615 |
}; |
|
616 |
||
617 |
/* |
|
618 |
* _init devfs module to fill in the vfssw |
|
619 |
*/ |
|
620 |
if (modload("fs", "devfs") == -1) |
|
621 |
cmn_err(CE_PANIC, "Cannot _init devfs module\n"); |
|
622 |
||
623 |
/* |
|
624 |
* Hold vfs |
|
625 |
*/ |
|
626 |
RLOCK_VFSSW(); |
|
627 |
vsw = vfs_getvfsswbyname("devfs"); |
|
628 |
VFS_INIT(&devices, &vsw->vsw_vfsops, NULL); |
|
629 |
VFS_HOLD(&devices); |
|
630 |
||
631 |
/* |
|
632 |
* Locate mount point |
|
633 |
*/ |
|
634 |
if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) |
|
635 |
cmn_err(CE_PANIC, "Cannot find /devices\n"); |
|
636 |
||
637 |
/* |
|
638 |
* Perform the mount of /devices |
|
639 |
*/ |
|
640 |
if (VFS_MOUNT(&devices, mvp, &mounta, CRED())) |
|
641 |
cmn_err(CE_PANIC, "Cannot mount /devices\n"); |
|
642 |
||
643 |
RUNLOCK_VFSSW(); |
|
644 |
||
645 |
/* |
|
646 |
* Set appropriate members and add to vfs list for mnttab display |
|
647 |
*/ |
|
648 |
vfs_setresource(&devices, "/devices"); |
|
649 |
vfs_setmntpoint(&devices, "/devices"); |
|
650 |
||
651 |
/* |
|
652 |
* Hold the root of /devices so it won't go away |
|
653 |
*/ |
|
654 |
if (VFS_ROOT(&devices, &devicesdir)) |
|
655 |
cmn_err(CE_PANIC, "vfs_mountdevices: not devices root"); |
|
656 |
||
657 |
if (vfs_lock(&devices) != 0) { |
|
2621 | 658 |
VN_RELE(devicesdir); |
0 | 659 |
cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices"); |
660 |
return; |
|
661 |
} |
|
662 |
||
663 |
if (vn_vfswlock(mvp) != 0) { |
|
664 |
vfs_unlock(&devices); |
|
2621 | 665 |
VN_RELE(devicesdir); |
0 | 666 |
cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices"); |
667 |
return; |
|
668 |
} |
|
669 |
||
670 |
vfs_add(mvp, &devices, 0); |
|
671 |
vn_vfsunlock(mvp); |
|
672 |
vfs_unlock(&devices); |
|
2621 | 673 |
VN_RELE(devicesdir); |
674 |
} |
|
675 |
||
676 |
/* |
|
677 |
* mount the first instance of /dev to root and remain mounted |
|
678 |
*/ |
|
679 |
static void |
|
680 |
vfs_mountdev1(void) |
|
681 |
{ |
|
682 |
struct vfssw *vsw; |
|
683 |
struct vnode *mvp; |
|
684 |
struct mounta mounta = { /* fake mounta for sdev_mount() */ |
|
685 |
NULL, |
|
686 |
NULL, |
|
687 |
MS_SYSSPACE | MS_OVERLAY, |
|
688 |
NULL, |
|
689 |
NULL, |
|
690 |
0, |
|
691 |
NULL, |
|
692 |
0 |
|
693 |
}; |
|
694 |
||
695 |
/* |
|
696 |
* _init dev module to fill in the vfssw |
|
697 |
*/ |
|
698 |
if (modload("fs", "dev") == -1) |
|
699 |
cmn_err(CE_PANIC, "Cannot _init dev module\n"); |
|
700 |
||
701 |
/* |
|
702 |
* Hold vfs |
|
703 |
*/ |
|
704 |
RLOCK_VFSSW(); |
|
705 |
vsw = vfs_getvfsswbyname("dev"); |
|
706 |
VFS_INIT(&dev, &vsw->vsw_vfsops, NULL); |
|
707 |
VFS_HOLD(&dev); |
|
708 |
||
709 |
/* |
|
710 |
* Locate mount point |
|
711 |
*/ |
|
712 |
if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) |
|
713 |
cmn_err(CE_PANIC, "Cannot find /dev\n"); |
|
714 |
||
715 |
/* |
|
716 |
* Perform the mount of /dev |
|
717 |
*/ |
|
718 |
if (VFS_MOUNT(&dev, mvp, &mounta, CRED())) |
|
719 |
cmn_err(CE_PANIC, "Cannot mount /dev 1\n"); |
|
720 |
||
721 |
RUNLOCK_VFSSW(); |
|
722 |
||
723 |
/* |
|
724 |
* Set appropriate members and add to vfs list for mnttab display |
|
725 |
*/ |
|
726 |
vfs_setresource(&dev, "/dev"); |
|
727 |
vfs_setmntpoint(&dev, "/dev"); |
|
728 |
||
729 |
/* |
|
730 |
* Hold the root of /dev so it won't go away |
|
731 |
*/ |
|
732 |
if (VFS_ROOT(&dev, &devdir)) |
|
733 |
cmn_err(CE_PANIC, "vfs_mountdev1: not dev root"); |
|
734 |
||
735 |
if (vfs_lock(&dev) != 0) { |
|
736 |
VN_RELE(devdir); |
|
737 |
cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev"); |
|
738 |
return; |
|
739 |
} |
|
740 |
||
741 |
if (vn_vfswlock(mvp) != 0) { |
|
742 |
vfs_unlock(&dev); |
|
743 |
VN_RELE(devdir); |
|
744 |
cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev"); |
|
745 |
return; |
|
746 |
} |
|
747 |
||
748 |
vfs_add(mvp, &dev, 0); |
|
749 |
vn_vfsunlock(mvp); |
|
750 |
vfs_unlock(&dev); |
|
751 |
VN_RELE(devdir); |
|
0 | 752 |
} |
753 |
||
754 |
/* |
|
755 |
* Mount required filesystem. This is done right after root is mounted. |
|
756 |
*/ |
|
757 |
static void |
|
758 |
vfs_mountfs(char *module, char *spec, char *path) |
|
759 |
{ |
|
760 |
struct vnode *mvp; |
|
761 |
struct mounta mounta; |
|
762 |
vfs_t *vfsp; |
|
763 |
||
764 |
mounta.flags = MS_SYSSPACE | MS_DATA; |
|
765 |
mounta.fstype = module; |
|
766 |
mounta.spec = spec; |
|
767 |
mounta.dir = path; |
|
768 |
if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) { |
|
769 |
cmn_err(CE_WARN, "Cannot find %s\n", path); |
|
770 |
return; |
|
771 |
} |
|
772 |
if (domount(NULL, &mounta, mvp, CRED(), &vfsp)) |
|
773 |
cmn_err(CE_WARN, "Cannot mount %s\n", path); |
|
774 |
else |
|
775 |
VFS_RELE(vfsp); |
|
776 |
VN_RELE(mvp); |
|
777 |
} |
|
778 |
||
779 |
/* |
|
780 |
* vfs_mountroot is called by main() to mount the root filesystem. |
|
781 |
*/ |
|
782 |
void |
|
783 |
vfs_mountroot(void) |
|
784 |
{ |
|
785 |
struct vnode *rvp = NULL; |
|
786 |
char *path; |
|
787 |
size_t plen; |
|
1488 | 788 |
struct vfssw *vswp; |
0 | 789 |
|
790 |
rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL); |
|
791 |
rw_init(&vfslist, NULL, RW_DEFAULT, NULL); |
|
792 |
||
793 |
/* |
|
794 |
* Alloc the vfs hash bucket array and locks |
|
795 |
*/ |
|
796 |
rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP); |
|
797 |
||
798 |
/* |
|
799 |
* Call machine-dependent routine "rootconf" to choose a root |
|
800 |
* file system type. |
|
801 |
*/ |
|
802 |
if (rootconf()) |
|
803 |
cmn_err(CE_PANIC, "vfs_mountroot: cannot mount root"); |
|
804 |
/* |
|
805 |
* Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir |
|
806 |
* to point to it. These are used by lookuppn() so that it |
|
807 |
* knows where to start from ('/' or '.'). |
|
808 |
*/ |
|
809 |
vfs_setmntpoint(rootvfs, "/"); |
|
810 |
if (VFS_ROOT(rootvfs, &rootdir)) |
|
811 |
cmn_err(CE_PANIC, "vfs_mountroot: no root vnode"); |
|
812 |
u.u_cdir = rootdir; |
|
813 |
VN_HOLD(u.u_cdir); |
|
814 |
u.u_rdir = NULL; |
|
815 |
||
816 |
/* |
|
817 |
* Setup the global zone's rootvp, now that it exists. |
|
818 |
*/ |
|
819 |
global_zone->zone_rootvp = rootdir; |
|
820 |
VN_HOLD(global_zone->zone_rootvp); |
|
821 |
||
822 |
/* |
|
823 |
* Notify the module code that it can begin using the |
|
824 |
* root filesystem instead of the boot program's services. |
|
825 |
*/ |
|
826 |
modrootloaded = 1; |
|
827 |
/* |
|
828 |
* Set up mnttab information for root |
|
829 |
*/ |
|
830 |
vfs_setresource(rootvfs, rootfs.bo_name); |
|
831 |
||
832 |
/* |
|
833 |
* Notify cluster software that the root filesystem is available. |
|
834 |
*/ |
|
835 |
clboot_mountroot(); |
|
836 |
||
1488 | 837 |
/* Now that we're all done with the root FS, set up its vopstats */ |
838 |
if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) { |
|
839 |
/* Set flag for statistics collection */ |
|
840 |
if (vswp->vsw_flag & VSW_STATS) { |
|
1520 | 841 |
initialize_vopstats(&rootvfs->vfs_vopstats); |
1488 | 842 |
rootvfs->vfs_flag |= VFS_STATS; |
1520 | 843 |
rootvfs->vfs_fstypevsp = |
844 |
get_fstype_vopstats(rootvfs, vswp); |
|
845 |
rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs); |
|
1488 | 846 |
} |
847 |
vfs_unrefvfssw(vswp); |
|
848 |
} |
|
849 |
||
0 | 850 |
/* |
2621 | 851 |
* Mount /devices, /dev instance 1, /system/contract, /etc/mnttab, |
852 |
* /etc/svc/volatile, /system/object, and /proc. |
|
0 | 853 |
*/ |
854 |
vfs_mountdevices(); |
|
2621 | 855 |
vfs_mountdev1(); |
0 | 856 |
|
857 |
vfs_mountfs("ctfs", "ctfs", CTFS_ROOT); |
|
858 |
vfs_mountfs("proc", "/proc", "/proc"); |
|
859 |
vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab"); |
|
860 |
vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile"); |
|
861 |
vfs_mountfs("objfs", "objfs", OBJFS_ROOT); |
|
862 |
||
863 |
#ifdef __sparc |
|
864 |
/* |
|
865 |
* This bit of magic can go away when we convert sparc to |
|
866 |
* the new boot architecture based on ramdisk. |
|
867 |
* |
|
868 |
* Booting off a mirrored root volume: |
|
869 |
* At this point, we have booted and mounted root on a |
|
870 |
* single component of the mirror. Complete the boot |
|
871 |
* by configuring SVM and converting the root to the |
|
872 |
* dev_t of the mirrored root device. This dev_t conversion |
|
873 |
* only works because the underlying device doesn't change. |
|
874 |
*/ |
|
875 |
if (root_is_svm) { |
|
876 |
if (svm_rootconf()) { |
|
877 |
cmn_err(CE_PANIC, "vfs_mountroot: cannot remount root"); |
|
878 |
} |
|
879 |
||
880 |
/* |
|
881 |
* mnttab should reflect the new root device |
|
882 |
*/ |
|
883 |
vfs_lock_wait(rootvfs); |
|
884 |
vfs_setresource(rootvfs, rootfs.bo_name); |
|
885 |
vfs_unlock(rootvfs); |
|
886 |
} |
|
887 |
#endif /* __sparc */ |
|
888 |
||
889 |
/* |
|
890 |
* Look up the root device via devfs so that a dv_node is |
|
891 |
* created for it. The vnode is never VN_RELE()ed. |
|
892 |
* We allocate more than MAXPATHLEN so that the |
|
893 |
* buffer passed to i_ddi_prompath_to_devfspath() is |
|
894 |
* exactly MAXPATHLEN (the function expects a buffer |
|
895 |
* of that length). |
|
896 |
*/ |
|
897 |
plen = strlen("/devices"); |
|
898 |
path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP); |
|
899 |
(void) strcpy(path, "/devices"); |
|
900 |
||
901 |
if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen) |
|
902 |
!= DDI_SUCCESS || |
|
903 |
lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) { |
|
904 |
||
905 |
/* NUL terminate in case "path" has garbage */ |
|
906 |
path[plen + MAXPATHLEN - 1] = '\0'; |
|
907 |
#ifdef DEBUG |
|
908 |
cmn_err(CE_WARN, "!Cannot lookup root device: %s", path); |
|
909 |
#endif |
|
910 |
} |
|
911 |
kmem_free(path, plen + MAXPATHLEN); |
|
912 |
} |
|
913 |
||
914 |
/* |
|
994
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
915 |
* If remount failed and we're in a zone we need to check for the zone |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
916 |
* root path and strip it before the call to vfs_setpath(). |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
917 |
* |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
918 |
* If strpath doesn't begin with the zone_rootpath the original |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
919 |
* strpath is returned unchanged. |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
920 |
*/ |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
921 |
static const char * |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
922 |
stripzonepath(const char *strpath) |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
923 |
{ |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
924 |
char *str1, *str2; |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
925 |
int i; |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
926 |
zone_t *zonep = curproc->p_zone; |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
927 |
|
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
928 |
if (zonep->zone_rootpath == NULL || strpath == NULL) { |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
929 |
return (NULL); |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
930 |
} |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
931 |
|
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
932 |
/* |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
933 |
* we check for the end of the string at one past the |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
934 |
* current position because the zone_rootpath always |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
935 |
* ends with "/" but we don't want to strip that off. |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
936 |
*/ |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
937 |
str1 = zonep->zone_rootpath; |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
938 |
str2 = (char *)strpath; |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
939 |
ASSERT(str1[0] != '\0'); |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
940 |
for (i = 0; str1[i + 1] != '\0'; i++) { |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
941 |
if (str1[i] != str2[i]) |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
942 |
return ((char *)strpath); |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
943 |
} |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
944 |
return (&str2[i]); |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
945 |
} |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
946 |
|
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
947 |
/* |
0 | 948 |
* Common mount code. Called from the system call entry point, from autofs, |
949 |
* and from pxfs. |
|
950 |
* |
|
951 |
* Takes the effective file system type, mount arguments, the mount point |
|
952 |
* vnode, flags specifying whether the mount is a remount and whether it |
|
953 |
* should be entered into the vfs list, and credentials. Fills in its vfspp |
|
954 |
* parameter with the mounted file system instance's vfs. |
|
955 |
* |
|
956 |
* Note that the effective file system type is specified as a string. It may |
|
957 |
* be null, in which case it's determined from the mount arguments, and may |
|
958 |
* differ from the type specified in the mount arguments; this is a hook to |
|
959 |
* allow interposition when instantiating file system instances. |
|
960 |
* |
|
961 |
* The caller is responsible for releasing its own hold on the mount point |
|
962 |
* vp (this routine does its own hold when necessary). |
|
963 |
* Also note that for remounts, the mount point vp should be the vnode for |
|
964 |
* the root of the file system rather than the vnode that the file system |
|
965 |
* is mounted on top of. |
|
966 |
*/ |
|
967 |
int |
|
968 |
domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, |
|
969 |
struct vfs **vfspp) |
|
970 |
{ |
|
971 |
struct vfssw *vswp; |
|
972 |
vfsops_t *vfsops; |
|
973 |
struct vfs *vfsp; |
|
974 |
struct vnode *bvp; |
|
975 |
dev_t bdev = 0; |
|
976 |
mntopts_t mnt_mntopts; |
|
977 |
int error = 0; |
|
978 |
int copyout_error = 0; |
|
979 |
int ovflags; |
|
980 |
char *opts = uap->optptr; |
|
981 |
char *inargs = opts; |
|
982 |
int optlen = uap->optlen; |
|
983 |
int remount; |
|
984 |
int rdonly; |
|
985 |
int nbmand = 0; |
|
986 |
int delmip = 0; |
|
987 |
int addmip = 0; |
|
988 |
int splice = ((uap->flags & MS_NOSPLICE) == 0); |
|
989 |
int fromspace = (uap->flags & MS_SYSSPACE) ? |
|
990 |
UIO_SYSSPACE : UIO_USERSPACE; |
|
991 |
char *resource = NULL, *mountpt = NULL; |
|
992 |
refstr_t *oldresource, *oldmntpt; |
|
993 |
struct pathname pn, rpn; |
|
1520 | 994 |
vsk_anchor_t *vskap; |
0 | 995 |
|
996 |
/* |
|
997 |
* The v_flag value for the mount point vp is permanently set |
|
998 |
* to VVFSLOCK so that no one bypasses the vn_vfs*locks routine |
|
999 |
* for mount point locking. |
|
1000 |
*/ |
|
1001 |
mutex_enter(&vp->v_lock); |
|
1002 |
vp->v_flag |= VVFSLOCK; |
|
1003 |
mutex_exit(&vp->v_lock); |
|
1004 |
||
1005 |
mnt_mntopts.mo_count = 0; |
|
1006 |
/* |
|
1007 |
* Find the ops vector to use to invoke the file system-specific mount |
|
1008 |
* method. If the fsname argument is non-NULL, use it directly. |
|
1009 |
* Otherwise, dig the file system type information out of the mount |
|
1010 |
* arguments. |
|
1011 |
* |
|
1012 |
* A side effect is to hold the vfssw entry. |
|
1013 |
* |
|
1014 |
* Mount arguments can be specified in several ways, which are |
|
1015 |
* distinguished by flag bit settings. The preferred way is to set |
|
1016 |
* MS_OPTIONSTR, indicating an 8 argument mount with the file system |
|
1017 |
* type supplied as a character string and the last two arguments |
|
1018 |
* being a pointer to a character buffer and the size of the buffer. |
|
1019 |
* On entry, the buffer holds a null terminated list of options; on |
|
1020 |
* return, the string is the list of options the file system |
|
1021 |
* recognized. If MS_DATA is set arguments five and six point to a |
|
1022 |
* block of binary data which the file system interprets. |
|
1023 |
* A further wrinkle is that some callers don't set MS_FSS and MS_DATA |
|
1024 |
* consistently with these conventions. To handle them, we check to |
|
1025 |
* see whether the pointer to the file system name has a numeric value |
|
1026 |
* less than 256. If so, we treat it as an index. |
|
1027 |
*/ |
|
1028 |
if (fsname != NULL) { |
|
1029 |
if ((vswp = vfs_getvfssw(fsname)) == NULL) { |
|
1030 |
return (EINVAL); |
|
1031 |
} |
|
1032 |
} else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) { |
|
1033 |
size_t n; |
|
1034 |
uint_t fstype; |
|
1035 |
char name[FSTYPSZ]; |
|
1036 |
||
1037 |
if ((fstype = (uintptr_t)uap->fstype) < 256) { |
|
1038 |
RLOCK_VFSSW(); |
|
1039 |
if (fstype == 0 || fstype >= nfstype || |
|
1040 |
!ALLOCATED_VFSSW(&vfssw[fstype])) { |
|
1041 |
RUNLOCK_VFSSW(); |
|
1042 |
return (EINVAL); |
|
1043 |
} |
|
1044 |
(void) strcpy(name, vfssw[fstype].vsw_name); |
|
1045 |
RUNLOCK_VFSSW(); |
|
1046 |
if ((vswp = vfs_getvfssw(name)) == NULL) |
|
1047 |
return (EINVAL); |
|
1048 |
} else { |
|
1049 |
/* |
|
1050 |
* Handle either kernel or user address space. |
|
1051 |
*/ |
|
1052 |
if (uap->flags & MS_SYSSPACE) { |
|
1053 |
error = copystr(uap->fstype, name, |
|
1054 |
FSTYPSZ, &n); |
|
1055 |
} else { |
|
1056 |
error = copyinstr(uap->fstype, name, |
|
1057 |
FSTYPSZ, &n); |
|
1058 |
} |
|
1059 |
if (error) { |
|
1060 |
if (error == ENAMETOOLONG) |
|
1061 |
return (EINVAL); |
|
1062 |
return (error); |
|
1063 |
} |
|
1064 |
if ((vswp = vfs_getvfssw(name)) == NULL) |
|
1065 |
return (EINVAL); |
|
1066 |
} |
|
1067 |
} else { |
|
1068 |
if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL) |
|
1069 |
return (EINVAL); |
|
1070 |
} |
|
1071 |
if (!VFS_INSTALLED(vswp)) |
|
1072 |
return (EINVAL); |
|
1073 |
vfsops = &vswp->vsw_vfsops; |
|
1074 |
||
1075 |
vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts); |
|
1076 |
/* |
|
1077 |
* Fetch mount options and parse them for generic vfs options |
|
1078 |
*/ |
|
1079 |
if (uap->flags & MS_OPTIONSTR) { |
|
1080 |
/* |
|
1081 |
* Limit the buffer size |
|
1082 |
*/ |
|
1083 |
if (optlen < 0 || optlen > MAX_MNTOPT_STR) { |
|
1084 |
error = EINVAL; |
|
1085 |
goto errout; |
|
1086 |
} |
|
1087 |
if ((uap->flags & MS_SYSSPACE) == 0) { |
|
1088 |
inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); |
|
1089 |
inargs[0] = '\0'; |
|
1090 |
if (optlen) { |
|
1091 |
error = copyinstr(opts, inargs, (size_t)optlen, |
|
1092 |
NULL); |
|
1093 |
if (error) { |
|
1094 |
goto errout; |
|
1095 |
} |
|
1096 |
} |
|
1097 |
} |
|
1098 |
vfs_parsemntopts(&mnt_mntopts, inargs, 0); |
|
1099 |
} |
|
1100 |
/* |
|
1101 |
* Flag bits override the options string. |
|
1102 |
*/ |
|
1103 |
if (uap->flags & MS_REMOUNT) |
|
1104 |
vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0); |
|
1105 |
if (uap->flags & MS_RDONLY) |
|
1106 |
vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0); |
|
1107 |
if (uap->flags & MS_NOSUID) |
|
1108 |
vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0); |
|
1109 |
||
1110 |
/* |
|
1111 |
* Check if this is a remount; must be set in the option string and |
|
1112 |
* the file system must support a remount option. |
|
1113 |
*/ |
|
1114 |
if (remount = vfs_optionisset_nolock(&mnt_mntopts, |
|
1115 |
MNTOPT_REMOUNT, NULL)) { |
|
1116 |
if (!(vswp->vsw_flag & VSW_CANREMOUNT)) { |
|
1117 |
error = ENOTSUP; |
|
1118 |
goto errout; |
|
1119 |
} |
|
1120 |
uap->flags |= MS_REMOUNT; |
|
1121 |
} |
|
1122 |
||
1123 |
/* |
|
1124 |
* uap->flags and vfs_optionisset() should agree. |
|
1125 |
*/ |
|
1126 |
if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) { |
|
1127 |
uap->flags |= MS_RDONLY; |
|
1128 |
} |
|
1129 |
if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) { |
|
1130 |
uap->flags |= MS_NOSUID; |
|
1131 |
} |
|
1132 |
nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL); |
|
1133 |
ASSERT(splice || !remount); |
|
1134 |
/* |
|
1135 |
* If we are splicing the fs into the namespace, |
|
1136 |
* perform mount point checks. |
|
1137 |
* |
|
1138 |
* We want to resolve the path for the mount point to eliminate |
|
1139 |
* '.' and ".." and symlinks in mount points; we can't do the |
|
1140 |
* same for the resource string, since it would turn |
|
1141 |
* "/dev/dsk/c0t0d0s0" into "/devices/pci@...". We need to do |
|
1142 |
* this before grabbing vn_vfswlock(), because otherwise we |
|
1143 |
* would deadlock with lookuppn(). |
|
1144 |
*/ |
|
1145 |
if (splice) { |
|
1146 |
ASSERT(vp->v_count > 0); |
|
1147 |
||
1148 |
/* |
|
1149 |
* Pick up mount point and device from appropriate space. |
|
1150 |
*/ |
|
1151 |
if (pn_get(uap->spec, fromspace, &pn) == 0) { |
|
1152 |
resource = kmem_alloc(pn.pn_pathlen + 1, |
|
1153 |
KM_SLEEP); |
|
1154 |
(void) strcpy(resource, pn.pn_path); |
|
1155 |
pn_free(&pn); |
|
1156 |
} |
|
1157 |
/* |
|
1158 |
* Do a lookupname prior to taking the |
|
1159 |
* writelock. Mark this as completed if |
|
1160 |
* successful for later cleanup and addition to |
|
1161 |
* the mount in progress table. |
|
1162 |
*/ |
|
1163 |
if ((uap->flags & MS_GLOBAL) == 0 && |
|
1164 |
lookupname(uap->spec, fromspace, |
|
1165 |
FOLLOW, NULL, &bvp) == 0) { |
|
1166 |
addmip = 1; |
|
1167 |
} |
|
1168 |
||
1169 |
if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) { |
|
1170 |
pathname_t *pnp; |
|
1171 |
||
1172 |
if (*pn.pn_path != '/') { |
|
1173 |
error = EINVAL; |
|
1174 |
pn_free(&pn); |
|
1175 |
goto errout; |
|
1176 |
} |
|
1177 |
pn_alloc(&rpn); |
|
1178 |
/* |
|
1179 |
* Kludge to prevent autofs from deadlocking with |
|
1180 |
* itself when it calls domount(). |
|
1181 |
* |
|
1182 |
* If autofs is calling, it is because it is doing |
|
1183 |
* (autofs) mounts in the process of an NFS mount. A |
|
1184 |
* lookuppn() here would cause us to block waiting for |
|
1185 |
* said NFS mount to complete, which can't since this |
|
1186 |
* is the thread that was supposed to doing it. |
|
1187 |
*/ |
|
1188 |
if (fromspace == UIO_USERSPACE) { |
|
1189 |
if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL, |
|
1190 |
NULL)) == 0) { |
|
1191 |
pnp = &rpn; |
|
1192 |
} else { |
|
1193 |
/* |
|
1194 |
* The file disappeared or otherwise |
|
1195 |
* became inaccessible since we opened |
|
1196 |
* it; might as well fail the mount |
|
1197 |
* since the mount point is no longer |
|
1198 |
* accessible. |
|
1199 |
*/ |
|
1200 |
pn_free(&rpn); |
|
1201 |
pn_free(&pn); |
|
1202 |
goto errout; |
|
1203 |
} |
|
1204 |
} else { |
|
1205 |
pnp = &pn; |
|
1206 |
} |
|
1207 |
mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP); |
|
1208 |
(void) strcpy(mountpt, pnp->pn_path); |
|
1209 |
||
1210 |
/* |
|
1211 |
* If the addition of the zone's rootpath |
|
1212 |
* would push us over a total path length |
|
1213 |
* of MAXPATHLEN, we fail the mount with |
|
1214 |
* ENAMETOOLONG, which is what we would have |
|
1215 |
* gotten if we were trying to perform the same |
|
1216 |
* mount in the global zone. |
|
1217 |
* |
|
1218 |
* strlen() doesn't count the trailing |
|
1219 |
* '\0', but zone_rootpathlen counts both a |
|
1220 |
* trailing '/' and the terminating '\0'. |
|
1221 |
*/ |
|
1222 |
if ((curproc->p_zone->zone_rootpathlen - 1 + |
|
1223 |
strlen(mountpt)) > MAXPATHLEN || |
|
1224 |
(resource != NULL && |
|
1225 |
(curproc->p_zone->zone_rootpathlen - 1 + |
|
1226 |
strlen(resource)) > MAXPATHLEN)) { |
|
1227 |
error = ENAMETOOLONG; |
|
1228 |
} |
|
1229 |
||
1230 |
pn_free(&rpn); |
|
1231 |
pn_free(&pn); |
|
1232 |
} |
|
1233 |
||
1234 |
if (error) |
|
1235 |
goto errout; |
|
1236 |
||
1237 |
/* |
|
1238 |
* Prevent path name resolution from proceeding past |
|
1239 |
* the mount point. |
|
1240 |
*/ |
|
1241 |
if (vn_vfswlock(vp) != 0) { |
|
1242 |
error = EBUSY; |
|
1243 |
goto errout; |
|
1244 |
} |
|
1245 |
||
1246 |
/* |
|
1247 |
* Verify that it's legitimate to establish a mount on |
|
1248 |
* the prospective mount point. |
|
1249 |
*/ |
|
1250 |
if (vn_mountedvfs(vp) != NULL) { |
|
1251 |
/* |
|
1252 |
* The mount point lock was obtained after some |
|
1253 |
* other thread raced through and established a mount. |
|
1254 |
*/ |
|
1255 |
vn_vfsunlock(vp); |
|
1256 |
error = EBUSY; |
|
1257 |
goto errout; |
|
1258 |
} |
|
1259 |
if (vp->v_flag & VNOMOUNT) { |
|
1260 |
vn_vfsunlock(vp); |
|
1261 |
error = EINVAL; |
|
1262 |
goto errout; |
|
1263 |
} |
|
1264 |
} |
|
1265 |
if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) { |
|
1266 |
uap->dataptr = NULL; |
|
1267 |
uap->datalen = 0; |
|
1268 |
} |
|
1269 |
||
1270 |
/* |
|
1271 |
* If this is a remount, we don't want to create a new VFS. |
|
1272 |
* Instead, we pass the existing one with a remount flag. |
|
1273 |
*/ |
|
1274 |
if (remount) { |
|
1275 |
/* |
|
1276 |
* Confirm that the mount point is the root vnode of the |
|
1277 |
* file system that is being remounted. |
|
1278 |
* This can happen if the user specifies a different |
|
1279 |
* mount point directory pathname in the (re)mount command. |
|
1280 |
* |
|
1281 |
* Code below can only be reached if splice is true, so it's |
|
1282 |
* safe to do vn_vfsunlock() here. |
|
1283 |
*/ |
|
1284 |
if ((vp->v_flag & VROOT) == 0) { |
|
1285 |
vn_vfsunlock(vp); |
|
1286 |
error = ENOENT; |
|
1287 |
goto errout; |
|
1288 |
} |
|
1289 |
/* |
|
1290 |
* Disallow making file systems read-only unless file system |
|
1291 |
* explicitly allows it in its vfssw. Ignore other flags. |
|
1292 |
*/ |
|
1293 |
if (rdonly && vn_is_readonly(vp) == 0 && |
|
1294 |
(vswp->vsw_flag & VSW_CANRWRO) == 0) { |
|
1295 |
vn_vfsunlock(vp); |
|
1296 |
error = EINVAL; |
|
1297 |
goto errout; |
|
1298 |
} |
|
1299 |
/* |
|
1300 |
* Changing the NBMAND setting on remounts is permitted |
|
1301 |
* but logged since it can lead to unexpected behavior. |
|
1302 |
* We also counsel against using it for / and /usr. |
|
1303 |
*/ |
|
1304 |
if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) || |
|
1305 |
(!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) { |
|
1306 |
cmn_err(CE_WARN, "domount: nbmand turned %s via " |
|
1307 |
"remounting %s", nbmand ? "on" : "off", |
|
1308 |
refstr_value(vp->v_vfsp->vfs_mntpt)); |
|
1309 |
} |
|
1310 |
vfsp = vp->v_vfsp; |
|
1311 |
ovflags = vfsp->vfs_flag; |
|
1312 |
vfsp->vfs_flag |= VFS_REMOUNT; |
|
1313 |
vfsp->vfs_flag &= ~VFS_RDONLY; |
|
1314 |
} else { |
|
1315 |
vfsp = kmem_alloc(sizeof (vfs_t), KM_SLEEP); |
|
1316 |
VFS_INIT(vfsp, vfsops, NULL); |
|
1317 |
} |
|
1318 |
||
1319 |
VFS_HOLD(vfsp); |
|
1320 |
||
1321 |
/* |
|
1322 |
* The vfs_reflock is not used anymore the code below explicitly |
|
1323 |
* holds it preventing others accesing it directly. |
|
1324 |
*/ |
|
1325 |
if ((sema_tryp(&vfsp->vfs_reflock) == 0) && |
|
1326 |
!(vfsp->vfs_flag & VFS_REMOUNT)) |
|
1327 |
cmn_err(CE_WARN, |
|
1328 |
"mount type %s couldn't get vfs_reflock\n", vswp->vsw_name); |
|
1329 |
||
1330 |
/* |
|
1331 |
* Lock the vfs. If this is a remount we want to avoid spurious umount |
|
1332 |
* failures that happen as a side-effect of fsflush() and other mount |
|
1333 |
* and unmount operations that might be going on simultaneously and |
|
1334 |
* may have locked the vfs currently. To not return EBUSY immediately |
|
1335 |
* here we use vfs_lock_wait() instead vfs_lock() for the remount case. |
|
1336 |
*/ |
|
1337 |
if (!remount) { |
|
1338 |
if (error = vfs_lock(vfsp)) { |
|
1339 |
vfsp->vfs_flag = ovflags; |
|
1340 |
if (splice) |
|
1341 |
vn_vfsunlock(vp); |
|
1925 | 1342 |
if (vfsp->vfs_implp) |
1343 |
vfsimpl_teardown(vfsp); |
|
0 | 1344 |
kmem_free(vfsp, sizeof (struct vfs)); |
1345 |
goto errout; |
|
1346 |
} |
|
1347 |
} else { |
|
1348 |
vfs_lock_wait(vfsp); |
|
1349 |
} |
|
1350 |
||
1351 |
/* |
|
1352 |
* Add device to mount in progress table, global mounts require special |
|
1353 |
* handling. It is possible that we have already done the lookupname |
|
1354 |
* on a spliced, non-global fs. If so, we don't want to do it again |
|
1355 |
* since we cannot do a lookupname after taking the |
|
1356 |
* wlock above. This case is for a non-spliced, non-global filesystem. |
|
1357 |
*/ |
|
1358 |
if (!addmip) { |
|
1359 |
if ((uap->flags & MS_GLOBAL) == 0 && |
|
1360 |
lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) { |
|
1361 |
addmip = 1; |
|
1362 |
} |
|
1363 |
} |
|
1364 |
||
1365 |
if (addmip) { |
|
1366 |
bdev = bvp->v_rdev; |
|
1367 |
VN_RELE(bvp); |
|
1368 |
vfs_addmip(bdev, vfsp); |
|
1369 |
addmip = 0; |
|
1370 |
delmip = 1; |
|
1371 |
} |
|
1372 |
/* |
|
1373 |
* Invalidate cached entry for the mount point. |
|
1374 |
*/ |
|
1375 |
if (splice) |
|
1376 |
dnlc_purge_vp(vp); |
|
1377 |
||
1378 |
/* |
|
1379 |
* If have an option string but the filesystem doesn't supply a |
|
1380 |
* prototype options table, create a table with the global |
|
1381 |
* options and sufficient room to accept all the options in the |
|
1382 |
* string. Then parse the passed in option string |
|
1383 |
* accepting all the options in the string. This gives us an |
|
1384 |
* option table with all the proper cancel properties for the |
|
1385 |
* global options. |
|
1386 |
* |
|
1387 |
* Filesystems that supply a prototype options table are handled |
|
1388 |
* earlier in this function. |
|
1389 |
*/ |
|
1390 |
if (uap->flags & MS_OPTIONSTR) { |
|
1391 |
if (!(vswp->vsw_flag & VSW_HASPROTO)) { |
|
1392 |
mntopts_t tmp_mntopts; |
|
1393 |
||
1394 |
tmp_mntopts.mo_count = 0; |
|
1395 |
vfs_createopttbl_extend(&tmp_mntopts, inargs, |
|
1396 |
&mnt_mntopts); |
|
1397 |
vfs_parsemntopts(&tmp_mntopts, inargs, 1); |
|
1398 |
vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts); |
|
1399 |
vfs_freeopttbl(&tmp_mntopts); |
|
1400 |
} |
|
1401 |
} |
|
1402 |
||
1403 |
/* |
|
1404 |
* Serialize with zone creations. |
|
1405 |
*/ |
|
1406 |
mount_in_progress(); |
|
1407 |
/* |
|
1408 |
* Instantiate (or reinstantiate) the file system. If appropriate, |
|
1409 |
* splice it into the file system name space. |
|
1410 |
* |
|
1411 |
* We want VFS_MOUNT() to be able to override the vfs_resource |
|
1412 |
* string if necessary (ie, mntfs), and also for a remount to |
|
1413 |
* change the same (necessary when remounting '/' during boot). |
|
1414 |
* So we set up vfs_mntpt and vfs_resource to what we think they |
|
1415 |
* should be, then hand off control to VFS_MOUNT() which can |
|
1416 |
* override this. |
|
1417 |
* |
|
1418 |
* For safety's sake, when changing vfs_resource or vfs_mntpt of |
|
1419 |
* a vfs which is on the vfs list (i.e. during a remount), we must |
|
1420 |
* never set those fields to NULL. Several bits of code make |
|
1421 |
* assumptions that the fields are always valid. |
|
1422 |
*/ |
|
1423 |
vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); |
|
1424 |
if (remount) { |
|
1425 |
if ((oldresource = vfsp->vfs_resource) != NULL) |
|
1426 |
refstr_hold(oldresource); |
|
1427 |
if ((oldmntpt = vfsp->vfs_mntpt) != NULL) |
|
1428 |
refstr_hold(oldmntpt); |
|
1429 |
} |
|
1430 |
vfs_setresource(vfsp, resource); |
|
1431 |
vfs_setmntpoint(vfsp, mountpt); |
|
1432 |
||
1433 |
error = VFS_MOUNT(vfsp, vp, uap, credp); |
|
1434 |
||
1435 |
if (uap->flags & MS_RDONLY) |
|
1436 |
vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); |
|
1437 |
if (uap->flags & MS_NOSUID) |
|
1438 |
vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0); |
|
1439 |
if (uap->flags & MS_GLOBAL) |
|
1440 |
vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0); |
|
1441 |
||
1442 |
if (error) { |
|
1443 |
if (remount) { |
|
1444 |
/* put back pre-remount options */ |
|
1445 |
vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); |
|
994
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
1446 |
vfs_setmntpoint(vfsp, (stripzonepath( |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
1447 |
refstr_value(oldmntpt)))); |
0 | 1448 |
if (oldmntpt) |
1449 |
refstr_rele(oldmntpt); |
|
994
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
1450 |
vfs_setresource(vfsp, (stripzonepath( |
83b8875ae3f3
6256783 /etc/mnttab gets screwed-up when an unprivileged user tries to remount in a local zone
evanl
parents:
254
diff
changeset
|
1451 |
refstr_value(oldresource)))); |
0 | 1452 |
if (oldresource) |
1453 |
refstr_rele(oldresource); |
|
1454 |
vfsp->vfs_flag = ovflags; |
|
1455 |
vfs_unlock(vfsp); |
|
1456 |
VFS_RELE(vfsp); |
|
1457 |
} else { |
|
1458 |
vfs_unlock(vfsp); |
|
1459 |
vfs_freemnttab(vfsp); |
|
1925 | 1460 |
if (vfsp->vfs_implp) |
1461 |
vfsimpl_teardown(vfsp); |
|
0 | 1462 |
kmem_free(vfsp, sizeof (struct vfs)); |
1463 |
} |
|
1464 |
} else { |
|
1465 |
/* |
|
1466 |
* Set the mount time to now |
|
1467 |
*/ |
|
1468 |
vfsp->vfs_mtime = ddi_get_time(); |
|
1469 |
if (remount) { |
|
1470 |
vfsp->vfs_flag &= ~VFS_REMOUNT; |
|
1471 |
if (oldresource) |
|
1472 |
refstr_rele(oldresource); |
|
1473 |
if (oldmntpt) |
|
1474 |
refstr_rele(oldmntpt); |
|
1475 |
} else if (splice) { |
|
1476 |
/* |
|
1477 |
* Link vfsp into the name space at the mount |
|
1478 |
* point. Vfs_add() is responsible for |
|
1479 |
* holding the mount point which will be |
|
1480 |
* released when vfs_remove() is called. |
|
1481 |
*/ |
|
1482 |
vfs_add(vp, vfsp, uap->flags); |
|
1483 |
} else { |
|
1484 |
/* |
|
1485 |
* Hold the reference to file system which is |
|
1486 |
* not linked into the name space. |
|
1487 |
*/ |
|
1488 |
vfsp->vfs_zone = NULL; |
|
1489 |
VFS_HOLD(vfsp); |
|
1490 |
vfsp->vfs_vnodecovered = NULL; |
|
1491 |
} |
|
1492 |
/* |
|
1493 |
* Set flags for global options encountered |
|
1494 |
*/ |
|
1495 |
if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) |
|
1496 |
vfsp->vfs_flag |= VFS_RDONLY; |
|
1497 |
else |
|
1498 |
vfsp->vfs_flag &= ~VFS_RDONLY; |
|
1499 |
if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { |
|
1500 |
vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES); |
|
1501 |
} else { |
|
1502 |
if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) |
|
1503 |
vfsp->vfs_flag |= VFS_NODEVICES; |
|
1504 |
else |
|
1505 |
vfsp->vfs_flag &= ~VFS_NODEVICES; |
|
1506 |
if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) |
|
1507 |
vfsp->vfs_flag |= VFS_NOSETUID; |
|
1508 |
else |
|
1509 |
vfsp->vfs_flag &= ~VFS_NOSETUID; |
|
1510 |
} |
|
1511 |
if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) |
|
1512 |
vfsp->vfs_flag |= VFS_NBMAND; |
|
1513 |
else |
|
1514 |
vfsp->vfs_flag &= ~VFS_NBMAND; |
|
1515 |
||
1516 |
if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) |
|
1517 |
vfsp->vfs_flag |= VFS_XATTR; |
|
1518 |
else |
|
1519 |
vfsp->vfs_flag &= ~VFS_XATTR; |
|
1520 |
||
1521 |
if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) |
|
1522 |
vfsp->vfs_flag |= VFS_NOEXEC; |
|
1523 |
else |
|
1524 |
vfsp->vfs_flag &= ~VFS_NOEXEC; |
|
1525 |
||
1526 |
/* |
|
1527 |
* Now construct the output option string of options |
|
1528 |
* we recognized. |
|
1529 |
*/ |
|
1530 |
if (uap->flags & MS_OPTIONSTR) { |
|
1531 |
vfs_list_read_lock(); |
|
1532 |
copyout_error = vfs_buildoptionstr( |
|
1533 |
&vfsp->vfs_mntopts, inargs, optlen); |
|
1534 |
vfs_list_unlock(); |
|
1535 |
if (copyout_error == 0 && |
|
1536 |
(uap->flags & MS_SYSSPACE) == 0) { |
|
1537 |
copyout_error = copyoutstr(inargs, opts, |
|
1538 |
optlen, NULL); |
|
1539 |
} |
|
1540 |
} |
|
1488 | 1541 |
|
1520 | 1542 |
/* |
1543 |
* If this isn't a remount, set up the vopstats before |
|
1678
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1544 |
* anyone can touch this. We only allow spliced file |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1545 |
* systems (file systems which are in the namespace) to |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1546 |
* have the VFS_STATS flag set. |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1547 |
* NOTE: PxFS mounts the underlying file system with |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1548 |
* MS_NOSPLICE set and copies those vfs_flags to its private |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1549 |
* vfs structure. As a result, PxFS should never have |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1550 |
* the VFS_STATS flag or else we might access the vfs |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1551 |
* statistics-related fields prior to them being |
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1552 |
* properly initialized. |
1520 | 1553 |
*/ |
1678
cd336ddf9a1c
6397933 fs node paniced while mounting global filesystem (/global/.devices/node@x)
rsb
parents:
1520
diff
changeset
|
1554 |
if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) { |
1520 | 1555 |
initialize_vopstats(&vfsp->vfs_vopstats); |
1556 |
/* |
|
1557 |
* We need to set vfs_vskap to NULL because there's |
|
1558 |
* a chance it won't be set below. This is checked |
|
1559 |
* in teardown_vopstats() so we can't have garbage. |
|
1560 |
*/ |
|
1561 |
vfsp->vfs_vskap = NULL; |
|
1488 | 1562 |
vfsp->vfs_flag |= VFS_STATS; |
1520 | 1563 |
vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp); |
1488 | 1564 |
} |
1565 |
||
0 | 1566 |
vfs_unlock(vfsp); |
1567 |
} |
|
1568 |
mount_completed(); |
|
1569 |
if (splice) |
|
1570 |
vn_vfsunlock(vp); |
|
1571 |
||
1572 |
if ((error == 0) && (copyout_error == 0)) { |
|
1520 | 1573 |
if (!remount) { |
1574 |
/* |
|
1575 |
* Don't call get_vskstat_anchor() while holding |
|
1576 |
* locks since it allocates memory and calls |
|
1577 |
* VFS_STATVFS(). For NFS, the latter can generate |
|
1578 |
* an over-the-wire call. |
|
1579 |
*/ |
|
1580 |
vskap = get_vskstat_anchor(vfsp); |
|
1581 |
/* Only take the lock if we have something to do */ |
|
1582 |
if (vskap != NULL) { |
|
1583 |
vfs_lock_wait(vfsp); |
|
1584 |
if (vfsp->vfs_flag & VFS_STATS) { |
|
1585 |
vfsp->vfs_vskap = vskap; |
|
1586 |
} |
|
1587 |
vfs_unlock(vfsp); |
|
1588 |
} |
|
1589 |
} |
|
1488 | 1590 |
/* Return vfsp to caller. */ |
0 | 1591 |
*vfspp = vfsp; |
1592 |
} |
|
1593 |
errout: |
|
1594 |
vfs_freeopttbl(&mnt_mntopts); |
|
1595 |
if (resource != NULL) |
|
1596 |
kmem_free(resource, strlen(resource) + 1); |
|
1597 |
if (mountpt != NULL) |
|
1598 |
kmem_free(mountpt, strlen(mountpt) + 1); |
|
1599 |
/* |
|
1600 |
* It is possible we errored prior to adding to mount in progress |
|
1601 |
* table. Must free vnode we acquired with successful lookupname. |
|
1602 |
*/ |
|
1603 |
if (addmip) |
|
1604 |
VN_RELE(bvp); |
|
1605 |
if (delmip) |
|
1606 |
vfs_delmip(vfsp); |
|
1607 |
ASSERT(vswp != NULL); |
|
1608 |
vfs_unrefvfssw(vswp); |
|
1609 |
if (inargs != opts) |
|
1610 |
kmem_free(inargs, MAX_MNTOPT_STR); |
|
1611 |
if (copyout_error) { |
|
1612 |
VFS_RELE(vfsp); |
|
1613 |
error = copyout_error; |
|
1614 |
} |
|
1615 |
return (error); |
|
1616 |
} |
|
1617 |
||
1618 |
static void |
|
1619 |
vfs_setpath(struct vfs *vfsp, refstr_t **refp, const char *newpath) |
|
1620 |
{ |
|
1621 |
size_t len; |
|
1622 |
refstr_t *ref; |
|
1623 |
zone_t *zone = curproc->p_zone; |
|
1624 |
char *sp; |
|
1625 |
int have_list_lock = 0; |
|
1626 |
||
1627 |
ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp)); |
|
1628 |
||
1629 |
/* |
|
1630 |
* New path must be less than MAXPATHLEN because mntfs |
|
1631 |
* will only display up to MAXPATHLEN bytes. This is currently |
|
1632 |
* safe, because domount() uses pn_get(), and other callers |
|
1633 |
* similarly cap the size to fewer than MAXPATHLEN bytes. |
|
1634 |
*/ |
|
1635 |
||
1636 |
ASSERT(strlen(newpath) < MAXPATHLEN); |
|
1637 |
||
1638 |
/* mntfs requires consistency while vfs list lock is held */ |
|
1639 |
||
1640 |
if (VFS_ON_LIST(vfsp)) { |
|
1641 |
have_list_lock = 1; |
|
1642 |
vfs_list_lock(); |
|
1643 |
} |
|
1644 |
||
1645 |
if (*refp != NULL) |
|
1646 |
refstr_rele(*refp); |
|
1647 |
||
1648 |
/* Do we need to modify the path? */ |
|
1649 |
||
1650 |
if (zone == global_zone || *newpath != '/') { |
|
1651 |
ref = refstr_alloc(newpath); |
|
1652 |
goto out; |
|
1653 |
} |
|
1654 |
||
1655 |
/* |
|
1656 |
* Truncate the trailing '/' in the zoneroot, and merge |
|
1657 |
* in the zone's rootpath with the "newpath" (resource |
|
1658 |
* or mountpoint) passed in. |
|
1659 |
* |
|
1660 |
* The size of the required buffer is thus the size of |
|
1661 |
* the buffer required for the passed-in newpath |
|
1662 |
* (strlen(newpath) + 1), plus the size of the buffer |
|
1663 |
* required to hold zone_rootpath (zone_rootpathlen) |
|
1664 |
* minus one for one of the now-superfluous NUL |
|
1665 |
* terminations, minus one for the trailing '/'. |
|
1666 |
* |
|
1667 |
* That gives us: |
|
1668 |
* |
|
1669 |
* (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1 |
|
1670 |
* |
|
1671 |
* Which is what we have below. |
|
1672 |
*/ |
|
1673 |
||
1674 |
len = strlen(newpath) + zone->zone_rootpathlen - 1; |
|
1675 |
sp = kmem_alloc(len, KM_SLEEP); |
|
1676 |
||
1677 |
/* |
|
1678 |
* Copy everything including the trailing slash, which |
|
1679 |
* we then overwrite with the NUL character. |
|
1680 |
*/ |
|
1681 |
||
1682 |
(void) strcpy(sp, zone->zone_rootpath); |
|
1683 |
sp[zone->zone_rootpathlen - 2] = '\0'; |
|
1684 |
(void) strcat(sp, newpath); |
|
1685 |
||
1686 |
ref = refstr_alloc(sp); |
|
1687 |
kmem_free(sp, len); |
|
1688 |
out: |
|
1689 |
*refp = ref; |
|
1690 |
||
1691 |
if (have_list_lock) { |
|
1692 |
vfs_mnttab_modtimeupd(); |
|
1693 |
vfs_list_unlock(); |
|
1694 |
} |
|
1695 |
} |
|
1696 |
||
1697 |
/* |
|
1698 |
* Record a mounted resource name in a vfs structure. |
|
1699 |
* If vfsp is already mounted, caller must hold the vfs lock. |
|
1700 |
*/ |
|
1701 |
void |
|
1702 |
vfs_setresource(struct vfs *vfsp, const char *resource) |
|
1703 |
{ |
|
1704 |
if (resource == NULL || resource[0] == '\0') |
|
1705 |
resource = VFS_NORESOURCE; |
|
1706 |
vfs_setpath(vfsp, &vfsp->vfs_resource, resource); |
|
1707 |
} |
|
1708 |
||
1709 |
/* |
|
1710 |
* Record a mount point name in a vfs structure. |
|
1711 |
* If vfsp is already mounted, caller must hold the vfs lock. |
|
1712 |
*/ |
|
1713 |
void |
|
1714 |
vfs_setmntpoint(struct vfs *vfsp, const char *mntpt) |
|
1715 |
{ |
|
1716 |
if (mntpt == NULL || mntpt[0] == '\0') |
|
1717 |
mntpt = VFS_NOMNTPT; |
|
1718 |
vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt); |
|
1719 |
} |
|
1720 |
||
1721 |
/* Returns the vfs_resource. Caller must call refstr_rele() when finished. */ |
|
1722 |
||
1723 |
refstr_t * |
|
1724 |
vfs_getresource(const struct vfs *vfsp) |
|
1725 |
{ |
|
1726 |
refstr_t *resource; |
|
1727 |
||
1728 |
vfs_list_read_lock(); |
|
1729 |
resource = vfsp->vfs_resource; |
|
1730 |
refstr_hold(resource); |
|
1731 |
vfs_list_unlock(); |
|
1732 |
||
1733 |
return (resource); |
|
1734 |
} |
|
1735 |
||
1736 |
/* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */ |
|
1737 |
||
1738 |
refstr_t * |
|
1739 |
vfs_getmntpoint(const struct vfs *vfsp) |
|
1740 |
{ |
|
1741 |
refstr_t *mntpt; |
|
1742 |
||
1743 |
vfs_list_read_lock(); |
|
1744 |
mntpt = vfsp->vfs_mntpt; |
|
1745 |
refstr_hold(mntpt); |
|
1746 |
vfs_list_unlock(); |
|
1747 |
||
1748 |
return (mntpt); |
|
1749 |
} |
|
1750 |
||
1751 |
/* |
|
1752 |
* Create an empty options table with enough empty slots to hold all |
|
1753 |
* The options in the options string passed as an argument. |
|
1754 |
* Potentially prepend another options table. |
|
1755 |
* |
|
1756 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
1757 |
* to protect mops. |
|
1758 |
*/ |
|
1759 |
static void |
|
1760 |
vfs_createopttbl_extend(mntopts_t *mops, const char *opts, |
|
1761 |
const mntopts_t *mtmpl) |
|
1762 |
{ |
|
1763 |
const char *s = opts; |
|
1764 |
uint_t count; |
|
1765 |
||
1766 |
if (opts == NULL || *opts == '\0') { |
|
1767 |
count = 0; |
|
1768 |
} else { |
|
1769 |
count = 1; |
|
1770 |
||
1771 |
/* |
|
1772 |
* Count number of options in the string |
|
1773 |
*/ |
|
1774 |
for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) { |
|
1775 |
count++; |
|
1776 |
s++; |
|
1777 |
} |
|
1778 |
} |
|
1779 |
vfs_copyopttbl_extend(mtmpl, mops, count); |
|
1780 |
} |
|
1781 |
||
1782 |
/* |
|
1783 |
* Create an empty options table with enough empty slots to hold all |
|
1784 |
* The options in the options string passed as an argument. |
|
1785 |
* |
|
1786 |
* This function is *not* for general use by filesystems. |
|
1787 |
* |
|
1788 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
1789 |
* to protect mops. |
|
1790 |
*/ |
|
1791 |
void |
|
1792 |
vfs_createopttbl(mntopts_t *mops, const char *opts) |
|
1793 |
{ |
|
1794 |
vfs_createopttbl_extend(mops, opts, NULL); |
|
1795 |
} |
|
1796 |
||
1797 |
||
1798 |
/* |
|
1799 |
* Swap two mount options tables |
|
1800 |
*/ |
|
1801 |
static void |
|
1802 |
vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2) |
|
1803 |
{ |
|
1804 |
uint_t tmpcnt; |
|
1805 |
mntopt_t *tmplist; |
|
1806 |
||
1807 |
tmpcnt = optbl2->mo_count; |
|
1808 |
tmplist = optbl2->mo_list; |
|
1809 |
optbl2->mo_count = optbl1->mo_count; |
|
1810 |
optbl2->mo_list = optbl1->mo_list; |
|
1811 |
optbl1->mo_count = tmpcnt; |
|
1812 |
optbl1->mo_list = tmplist; |
|
1813 |
} |
|
1814 |
||
1815 |
static void |
|
1816 |
vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2) |
|
1817 |
{ |
|
1818 |
vfs_list_lock(); |
|
1819 |
vfs_swapopttbl_nolock(optbl1, optbl2); |
|
1820 |
vfs_mnttab_modtimeupd(); |
|
1821 |
vfs_list_unlock(); |
|
1822 |
} |
|
1823 |
||
1824 |
static char ** |
|
1825 |
vfs_copycancelopt_extend(char **const moc, int extend) |
|
1826 |
{ |
|
1827 |
int i = 0; |
|
1828 |
int j; |
|
1829 |
char **result; |
|
1830 |
||
1831 |
if (moc != NULL) { |
|
1832 |
for (; moc[i] != NULL; i++) |
|
1833 |
/* count number of options to cancel */; |
|
1834 |
} |
|
1835 |
||
1836 |
if (i + extend == 0) |
|
1837 |
return (NULL); |
|
1838 |
||
1839 |
result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP); |
|
1840 |
||
1841 |
for (j = 0; j < i; j++) { |
|
1842 |
result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP); |
|
1843 |
(void) strcpy(result[j], moc[j]); |
|
1844 |
} |
|
1845 |
for (; j <= i + extend; j++) |
|
1846 |
result[j] = NULL; |
|
1847 |
||
1848 |
return (result); |
|
1849 |
} |
|
1850 |
||
1851 |
static void |
|
1852 |
vfs_copyopt(const mntopt_t *s, mntopt_t *d) |
|
1853 |
{ |
|
1854 |
char *sp, *dp; |
|
1855 |
||
1856 |
d->mo_flags = s->mo_flags; |
|
1857 |
d->mo_data = s->mo_data; |
|
1858 |
sp = s->mo_name; |
|
1859 |
if (sp != NULL) { |
|
1860 |
dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); |
|
1861 |
(void) strcpy(dp, sp); |
|
1862 |
d->mo_name = dp; |
|
1863 |
} else { |
|
1864 |
d->mo_name = NULL; /* should never happen */ |
|
1865 |
} |
|
1866 |
||
1867 |
d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0); |
|
1868 |
||
1869 |
sp = s->mo_arg; |
|
1870 |
if (sp != NULL) { |
|
1871 |
dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); |
|
1872 |
(void) strcpy(dp, sp); |
|
1873 |
d->mo_arg = dp; |
|
1874 |
} else { |
|
1875 |
d->mo_arg = NULL; |
|
1876 |
} |
|
1877 |
} |
|
1878 |
||
1879 |
/* |
|
1880 |
* Copy a mount options table, possibly allocating some spare |
|
1881 |
* slots at the end. It is permissible to copy_extend the NULL table. |
|
1882 |
*/ |
|
1883 |
static void |
|
1884 |
vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra) |
|
1885 |
{ |
|
1886 |
uint_t i, count; |
|
1887 |
mntopt_t *motbl; |
|
1888 |
||
1889 |
/* |
|
1890 |
* Clear out any existing stuff in the options table being initialized |
|
1891 |
*/ |
|
1892 |
vfs_freeopttbl(dmo); |
|
1893 |
count = (smo == NULL) ? 0 : smo->mo_count; |
|
1894 |
if ((count + extra) == 0) /* nothing to do */ |
|
1895 |
return; |
|
1896 |
dmo->mo_count = count + extra; |
|
1897 |
motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP); |
|
1898 |
dmo->mo_list = motbl; |
|
1899 |
for (i = 0; i < count; i++) { |
|
1900 |
vfs_copyopt(&smo->mo_list[i], &motbl[i]); |
|
1901 |
} |
|
1902 |
for (i = count; i < count + extra; i++) { |
|
1903 |
motbl[i].mo_flags = MO_EMPTY; |
|
1904 |
} |
|
1905 |
} |
|
1906 |
||
1907 |
/* |
|
1908 |
* Copy a mount options table. |
|
1909 |
* |
|
1910 |
* This function is *not* for general use by filesystems. |
|
1911 |
* |
|
1912 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
1913 |
* to protect smo and dmo. |
|
1914 |
*/ |
|
1915 |
void |
|
1916 |
vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo) |
|
1917 |
{ |
|
1918 |
vfs_copyopttbl_extend(smo, dmo, 0); |
|
1919 |
} |
|
1920 |
||
1921 |
static char ** |
|
1922 |
vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2) |
|
1923 |
{ |
|
1924 |
int c1 = 0; |
|
1925 |
int c2 = 0; |
|
1926 |
char **result; |
|
1927 |
char **sp1, **sp2, **dp; |
|
1928 |
||
1929 |
/* |
|
1930 |
* First we count both lists of cancel options. |
|
1931 |
* If either is NULL or has no elements, we return a copy of |
|
1932 |
* the other. |
|
1933 |
*/ |
|
1934 |
if (mop1->mo_cancel != NULL) { |
|
1935 |
for (; mop1->mo_cancel[c1] != NULL; c1++) |
|
1936 |
/* count cancel options in mop1 */; |
|
1937 |
} |
|
1938 |
||
1939 |
if (c1 == 0) |
|
1940 |
return (vfs_copycancelopt_extend(mop2->mo_cancel, 0)); |
|
1941 |
||
1942 |
if (mop2->mo_cancel != NULL) { |
|
1943 |
for (; mop2->mo_cancel[c2] != NULL; c2++) |
|
1944 |
/* count cancel options in mop2 */; |
|
1945 |
} |
|
1946 |
||
1947 |
result = vfs_copycancelopt_extend(mop1->mo_cancel, c2); |
|
1948 |
||
1949 |
if (c2 == 0) |
|
1950 |
return (result); |
|
1951 |
||
1952 |
/* |
|
1953 |
* When we get here, we've got two sets of cancel options; |
|
1954 |
* we need to merge the two sets. We know that the result |
|
1955 |
* array has "c1+c2+1" entries and in the end we might shrink |
|
1956 |
* it. |
|
1957 |
* Result now has a copy of the c1 entries from mop1; we'll |
|
1958 |
* now lookup all the entries of mop2 in mop1 and copy it if |
|
1959 |
* it is unique. |
|
1960 |
* This operation is O(n^2) but it's only called once per |
|
1961 |
* filesystem per duplicate option. This is a situation |
|
1962 |
* which doesn't arise with the filesystems in ON and |
|
1963 |
* n is generally 1. |
|
1964 |
*/ |
|
1965 |
||
1966 |
dp = &result[c1]; |
|
1967 |
for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) { |
|
1968 |
for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) { |
|
1969 |
if (strcmp(*sp1, *sp2) == 0) |
|
1970 |
break; |
|
1971 |
} |
|
1972 |
if (*sp1 == NULL) { |
|
1973 |
/* |
|
1974 |
* Option *sp2 not found in mop1, so copy it. |
|
1975 |
* The calls to vfs_copycancelopt_extend() |
|
1976 |
* guarantee that there's enough room. |
|
1977 |
*/ |
|
1978 |
*dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP); |
|
1979 |
(void) strcpy(*dp++, *sp2); |
|
1980 |
} |
|
1981 |
} |
|
1982 |
if (dp != &result[c1+c2]) { |
|
1983 |
size_t bytes = (dp - result + 1) * sizeof (char *); |
|
1984 |
char **nres = kmem_alloc(bytes, KM_SLEEP); |
|
1985 |
||
1986 |
bcopy(result, nres, bytes); |
|
1987 |
kmem_free(result, (c1 + c2 + 1) * sizeof (char *)); |
|
1988 |
result = nres; |
|
1989 |
} |
|
1990 |
return (result); |
|
1991 |
} |
|
1992 |
||
1993 |
/* |
|
1994 |
* Merge two mount option tables (outer and inner) into one. This is very |
|
1995 |
* similar to "merging" global variables and automatic variables in C. |
|
1996 |
* |
|
1997 |
* This isn't (and doesn't have to be) fast. |
|
1998 |
* |
|
1999 |
* This function is *not* for general use by filesystems. |
|
2000 |
* |
|
2001 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
2002 |
* to protect omo, imo & dmo. |
|
2003 |
*/ |
|
2004 |
void |
|
2005 |
vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo) |
|
2006 |
{ |
|
2007 |
uint_t i, count; |
|
2008 |
mntopt_t *mop, *motbl; |
|
2009 |
uint_t freeidx; |
|
2010 |
||
2011 |
/* |
|
2012 |
* First determine how much space we need to allocate. |
|
2013 |
*/ |
|
2014 |
count = omo->mo_count; |
|
2015 |
for (i = 0; i < imo->mo_count; i++) { |
|
2016 |
if (imo->mo_list[i].mo_flags & MO_EMPTY) |
|
2017 |
continue; |
|
2018 |
if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL) |
|
2019 |
count++; |
|
2020 |
} |
|
2021 |
ASSERT(count >= omo->mo_count && |
|
2022 |
count <= omo->mo_count + imo->mo_count); |
|
2023 |
motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP); |
|
2024 |
for (i = 0; i < omo->mo_count; i++) |
|
2025 |
vfs_copyopt(&omo->mo_list[i], &motbl[i]); |
|
2026 |
freeidx = omo->mo_count; |
|
2027 |
for (i = 0; i < imo->mo_count; i++) { |
|
2028 |
if (imo->mo_list[i].mo_flags & MO_EMPTY) |
|
2029 |
continue; |
|
2030 |
if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) { |
|
2031 |
char **newcanp; |
|
2032 |
uint_t index = mop - omo->mo_list; |
|
2033 |
||
2034 |
newcanp = vfs_mergecancelopts(mop, &motbl[index]); |
|
2035 |
||
2036 |
vfs_freeopt(&motbl[index]); |
|
2037 |
vfs_copyopt(&imo->mo_list[i], &motbl[index]); |
|
2038 |
||
2039 |
vfs_freecancelopt(motbl[index].mo_cancel); |
|
2040 |
motbl[index].mo_cancel = newcanp; |
|
2041 |
} else { |
|
2042 |
/* |
|
2043 |
* If it's a new option, just copy it over to the first |
|
2044 |
* free location. |
|
2045 |
*/ |
|
2046 |
vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]); |
|
2047 |
} |
|
2048 |
} |
|
2049 |
dmo->mo_count = count; |
|
2050 |
dmo->mo_list = motbl; |
|
2051 |
} |
|
2052 |
||
2053 |
/* |
|
2054 |
* Functions to set and clear mount options in a mount options table. |
|
2055 |
*/ |
|
2056 |
||
2057 |
/* |
|
2058 |
* Clear a mount option, if it exists. |
|
2059 |
* |
|
2060 |
* The update_mnttab arg indicates whether mops is part of a vfs that is on |
|
2061 |
* the vfs list. |
|
2062 |
*/ |
|
2063 |
static void |
|
2064 |
vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab) |
|
2065 |
{ |
|
2066 |
struct mntopt *mop; |
|
2067 |
uint_t i, count; |
|
2068 |
||
2069 |
ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); |
|
2070 |
||
2071 |
count = mops->mo_count; |
|
2072 |
for (i = 0; i < count; i++) { |
|
2073 |
mop = &mops->mo_list[i]; |
|
2074 |
||
2075 |
if (mop->mo_flags & MO_EMPTY) |
|
2076 |
continue; |
|
2077 |
if (strcmp(opt, mop->mo_name)) |
|
2078 |
continue; |
|
2079 |
mop->mo_flags &= ~MO_SET; |
|
2080 |
if (mop->mo_arg != NULL) { |
|
2081 |
kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); |
|
2082 |
} |
|
2083 |
mop->mo_arg = NULL; |
|
2084 |
if (update_mnttab) |
|
2085 |
vfs_mnttab_modtimeupd(); |
|
2086 |
break; |
|
2087 |
} |
|
2088 |
} |
|
2089 |
||
2090 |
void |
|
2091 |
vfs_clearmntopt(struct vfs *vfsp, const char *opt) |
|
2092 |
{ |
|
2093 |
int gotlock = 0; |
|
2094 |
||
2095 |
if (VFS_ON_LIST(vfsp)) { |
|
2096 |
gotlock = 1; |
|
2097 |
vfs_list_lock(); |
|
2098 |
} |
|
2099 |
vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock); |
|
2100 |
if (gotlock) |
|
2101 |
vfs_list_unlock(); |
|
2102 |
} |
|
2103 |
||
2104 |
||
2105 |
/* |
|
2106 |
* Set a mount option on. If it's not found in the table, it's silently |
|
2107 |
* ignored. If the option has MO_IGNORE set, it is still set unless the |
|
2108 |
* VFS_NOFORCEOPT bit is set in the flags. Also, VFS_DISPLAY/VFS_NODISPLAY flag |
|
2109 |
* bits can be used to toggle the MO_NODISPLAY bit for the option. |
|
2110 |
* If the VFS_CREATEOPT flag bit is set then the first option slot with |
|
2111 |
* MO_EMPTY set is created as the option passed in. |
|
2112 |
* |
|
2113 |
* The update_mnttab arg indicates whether mops is part of a vfs that is on |
|
2114 |
* the vfs list. |
|
2115 |
*/ |
|
2116 |
static void |
|
2117 |
vfs_setmntopt_nolock(mntopts_t *mops, const char *opt, |
|
2118 |
const char *arg, int flags, int update_mnttab) |
|
2119 |
{ |
|
2120 |
mntopt_t *mop; |
|
2121 |
uint_t i, count; |
|
2122 |
char *sp; |
|
2123 |
||
2124 |
ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); |
|
2125 |
||
2126 |
if (flags & VFS_CREATEOPT) { |
|
2127 |
if (vfs_hasopt(mops, opt) != NULL) { |
|
2128 |
flags &= ~VFS_CREATEOPT; |
|
2129 |
} |
|
2130 |
} |
|
2131 |
count = mops->mo_count; |
|
2132 |
for (i = 0; i < count; i++) { |
|
2133 |
mop = &mops->mo_list[i]; |
|
2134 |
||
2135 |
if (mop->mo_flags & MO_EMPTY) { |
|
2136 |
if ((flags & VFS_CREATEOPT) == 0) |
|
2137 |
continue; |
|
2138 |
sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP); |
|
2139 |
(void) strcpy(sp, opt); |
|
2140 |
mop->mo_name = sp; |
|
2141 |
if (arg != NULL) |
|
2142 |
mop->mo_flags = MO_HASVALUE; |
|
2143 |
else |
|
2144 |
mop->mo_flags = 0; |
|
2145 |
} else if (strcmp(opt, mop->mo_name)) { |
|
2146 |
continue; |
|
2147 |
} |
|
2148 |
if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT)) |
|
2149 |
break; |
|
2150 |
if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) { |
|
2151 |
sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP); |
|
2152 |
(void) strcpy(sp, arg); |
|
2153 |
} else { |
|
2154 |
sp = NULL; |
|
2155 |
} |
|
2156 |
if (mop->mo_arg != NULL) |
|
2157 |
kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); |
|
2158 |
mop->mo_arg = sp; |
|
2159 |
if (flags & VFS_DISPLAY) |
|
2160 |
mop->mo_flags &= ~MO_NODISPLAY; |
|
2161 |
if (flags & VFS_NODISPLAY) |
|
2162 |
mop->mo_flags |= MO_NODISPLAY; |
|
2163 |
mop->mo_flags |= MO_SET; |
|
2164 |
if (mop->mo_cancel != NULL) { |
|
2165 |
char **cp; |
|
2166 |
||
2167 |
for (cp = mop->mo_cancel; *cp != NULL; cp++) |
|
2168 |
vfs_clearmntopt_nolock(mops, *cp, 0); |
|
2169 |
} |
|
2170 |
if (update_mnttab) |
|
2171 |
vfs_mnttab_modtimeupd(); |
|
2172 |
break; |
|
2173 |
} |
|
2174 |
} |
|
2175 |
||
2176 |
void |
|
2177 |
vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags) |
|
2178 |
{ |
|
2179 |
int gotlock = 0; |
|
2180 |
||
2181 |
if (VFS_ON_LIST(vfsp)) { |
|
2182 |
gotlock = 1; |
|
2183 |
vfs_list_lock(); |
|
2184 |
} |
|
2185 |
vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock); |
|
2186 |
if (gotlock) |
|
2187 |
vfs_list_unlock(); |
|
2188 |
} |
|
2189 |
||
2190 |
||
2191 |
/* |
|
2192 |
* Add a "tag" option to a mounted file system's options list. |
|
2193 |
* |
|
2194 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
2195 |
* to protect mops. |
|
2196 |
*/ |
|
2197 |
static mntopt_t * |
|
2198 |
vfs_addtag(mntopts_t *mops, const char *tag) |
|
2199 |
{ |
|
2200 |
uint_t count; |
|
2201 |
mntopt_t *mop, *motbl; |
|
2202 |
||
2203 |
count = mops->mo_count + 1; |
|
2204 |
motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP); |
|
2205 |
if (mops->mo_count) { |
|
2206 |
size_t len = (count - 1) * sizeof (mntopt_t); |
|
2207 |
||
2208 |
bcopy(mops->mo_list, motbl, len); |
|
2209 |
kmem_free(mops->mo_list, len); |
|
2210 |
} |
|
2211 |
mops->mo_count = count; |
|
2212 |
mops->mo_list = motbl; |
|
2213 |
mop = &motbl[count - 1]; |
|
2214 |
mop->mo_flags = MO_TAG; |
|
2215 |
mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP); |
|
2216 |
(void) strcpy(mop->mo_name, tag); |
|
2217 |
return (mop); |
|
2218 |
} |
|
2219 |
||
2220 |
/* |
|
2221 |
* Allow users to set arbitrary "tags" in a vfs's mount options. |
|
2222 |
* Broader use within the kernel is discouraged. |
|
2223 |
*/ |
|
2224 |
int |
|
2225 |
vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag, |
|
2226 |
cred_t *cr) |
|
2227 |
{ |
|
2228 |
vfs_t *vfsp; |
|
2229 |
mntopts_t *mops; |
|
2230 |
mntopt_t *mop; |
|
2231 |
int found = 0; |
|
2232 |
dev_t dev = makedevice(major, minor); |
|
2233 |
int err = 0; |
|
2234 |
char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); |
|
2235 |
||
2236 |
/* |
|
2237 |
* Find the desired mounted file system |
|
2238 |
*/ |
|
2239 |
vfs_list_lock(); |
|
2240 |
vfsp = rootvfs; |
|
2241 |
do { |
|
2242 |
if (vfsp->vfs_dev == dev && |
|
2243 |
strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { |
|
2244 |
found = 1; |
|
2245 |
break; |
|
2246 |
} |
|
2247 |
vfsp = vfsp->vfs_next; |
|
2248 |
} while (vfsp != rootvfs); |
|
2249 |
||
2250 |
if (!found) { |
|
2251 |
err = EINVAL; |
|
2252 |
goto out; |
|
2253 |
} |
|
2254 |
err = secpolicy_fs_config(cr, vfsp); |
|
2255 |
if (err != 0) |
|
2256 |
goto out; |
|
2257 |
||
2258 |
mops = &vfsp->vfs_mntopts; |
|
2259 |
/* |
|
2260 |
* Add tag if it doesn't already exist |
|
2261 |
*/ |
|
2262 |
if ((mop = vfs_hasopt(mops, tag)) == NULL) { |
|
2263 |
int len; |
|
2264 |
||
2265 |
(void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR); |
|
2266 |
len = strlen(buf); |
|
2267 |
if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) { |
|
2268 |
err = ENAMETOOLONG; |
|
2269 |
goto out; |
|
2270 |
} |
|
2271 |
mop = vfs_addtag(mops, tag); |
|
2272 |
} |
|
2273 |
if ((mop->mo_flags & MO_TAG) == 0) { |
|
2274 |
err = EINVAL; |
|
2275 |
goto out; |
|
2276 |
} |
|
2277 |
vfs_setmntopt_nolock(mops, tag, NULL, 0, 1); |
|
2278 |
out: |
|
2279 |
vfs_list_unlock(); |
|
2280 |
kmem_free(buf, MAX_MNTOPT_STR); |
|
2281 |
return (err); |
|
2282 |
} |
|
2283 |
||
2284 |
/* |
|
2285 |
* Allow users to remove arbitrary "tags" in a vfs's mount options. |
|
2286 |
* Broader use within the kernel is discouraged. |
|
2287 |
*/ |
|
2288 |
int |
|
2289 |
vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag, |
|
2290 |
cred_t *cr) |
|
2291 |
{ |
|
2292 |
vfs_t *vfsp; |
|
2293 |
mntopt_t *mop; |
|
2294 |
int found = 0; |
|
2295 |
dev_t dev = makedevice(major, minor); |
|
2296 |
int err = 0; |
|
2297 |
||
2298 |
/* |
|
2299 |
* Find the desired mounted file system |
|
2300 |
*/ |
|
2301 |
vfs_list_lock(); |
|
2302 |
vfsp = rootvfs; |
|
2303 |
do { |
|
2304 |
if (vfsp->vfs_dev == dev && |
|
2305 |
strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { |
|
2306 |
found = 1; |
|
2307 |
break; |
|
2308 |
} |
|
2309 |
vfsp = vfsp->vfs_next; |
|
2310 |
} while (vfsp != rootvfs); |
|
2311 |
||
2312 |
if (!found) { |
|
2313 |
err = EINVAL; |
|
2314 |
goto out; |
|
2315 |
} |
|
2316 |
err = secpolicy_fs_config(cr, vfsp); |
|
2317 |
if (err != 0) |
|
2318 |
goto out; |
|
2319 |
||
2320 |
if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) { |
|
2321 |
err = EINVAL; |
|
2322 |
goto out; |
|
2323 |
} |
|
2324 |
if ((mop->mo_flags & MO_TAG) == 0) { |
|
2325 |
err = EINVAL; |
|
2326 |
goto out; |
|
2327 |
} |
|
2328 |
vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1); |
|
2329 |
out: |
|
2330 |
vfs_list_unlock(); |
|
2331 |
return (err); |
|
2332 |
} |
|
2333 |
||
2334 |
/* |
|
2335 |
* Function to parse an option string and fill in a mount options table. |
|
2336 |
* Unknown options are silently ignored. The input option string is modified |
|
2337 |
* by replacing separators with nulls. If the create flag is set, options |
|
2338 |
* not found in the table are just added on the fly. The table must have |
|
2339 |
* an option slot marked MO_EMPTY to add an option on the fly. |
|
2340 |
* |
|
2341 |
* This function is *not* for general use by filesystems. |
|
2342 |
* |
|
2343 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
2344 |
* to protect mops.. |
|
2345 |
*/ |
|
2346 |
void |
|
2347 |
vfs_parsemntopts(mntopts_t *mops, char *osp, int create) |
|
2348 |
{ |
|
2349 |
char *s = osp, *p, *nextop, *valp, *cp, *ep; |
|
2350 |
int setflg = VFS_NOFORCEOPT; |
|
2351 |
||
2352 |
if (osp == NULL) |
|
2353 |
return; |
|
2354 |
while (*s != '\0') { |
|
2355 |
p = strchr(s, ','); /* find next option */ |
|
2356 |
if (p == NULL) { |
|
2357 |
cp = NULL; |
|
2358 |
p = s + strlen(s); |
|
2359 |
} else { |
|
2360 |
cp = p; /* save location of comma */ |
|
2361 |
*p++ = '\0'; /* mark end and point to next option */ |
|
2362 |
} |
|
2363 |
nextop = p; |
|
2364 |
p = strchr(s, '='); /* look for value */ |
|
2365 |
if (p == NULL) { |
|
2366 |
valp = NULL; /* no value supplied */ |
|
2367 |
} else { |
|
2368 |
ep = p; /* save location of equals */ |
|
2369 |
*p++ = '\0'; /* end option and point to value */ |
|
2370 |
valp = p; |
|
2371 |
} |
|
2372 |
/* |
|
2373 |
* set option into options table |
|
2374 |
*/ |
|
2375 |
if (create) |
|
2376 |
setflg |= VFS_CREATEOPT; |
|
2377 |
vfs_setmntopt_nolock(mops, s, valp, setflg, 0); |
|
2378 |
if (cp != NULL) |
|
2379 |
*cp = ','; /* restore the comma */ |
|
2380 |
if (valp != NULL) |
|
2381 |
*ep = '='; /* restore the equals */ |
|
2382 |
s = nextop; |
|
2383 |
} |
|
2384 |
} |
|
2385 |
||
2386 |
/* |
|
2387 |
* Function to inquire if an option exists in a mount options table. |
|
2388 |
* Returns a pointer to the option if it exists, else NULL. |
|
2389 |
* |
|
2390 |
* This function is *not* for general use by filesystems. |
|
2391 |
* |
|
2392 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
2393 |
* to protect mops. |
|
2394 |
*/ |
|
2395 |
struct mntopt * |
|
2396 |
vfs_hasopt(const mntopts_t *mops, const char *opt) |
|
2397 |
{ |
|
2398 |
struct mntopt *mop; |
|
2399 |
uint_t i, count; |
|
2400 |
||
2401 |
count = mops->mo_count; |
|
2402 |
for (i = 0; i < count; i++) { |
|
2403 |
mop = &mops->mo_list[i]; |
|
2404 |
||
2405 |
if (mop->mo_flags & MO_EMPTY) |
|
2406 |
continue; |
|
2407 |
if (strcmp(opt, mop->mo_name) == 0) |
|
2408 |
return (mop); |
|
2409 |
} |
|
2410 |
return (NULL); |
|
2411 |
} |
|
2412 |
||
2413 |
/* |
|
2414 |
* Function to inquire if an option is set in a mount options table. |
|
2415 |
* Returns non-zero if set and fills in the arg pointer with a pointer to |
|
2416 |
* the argument string or NULL if there is no argument string. |
|
2417 |
*/ |
|
2418 |
static int |
|
2419 |
vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp) |
|
2420 |
{ |
|
2421 |
struct mntopt *mop; |
|
2422 |
uint_t i, count; |
|
2423 |
||
2424 |
count = mops->mo_count; |
|
2425 |
for (i = 0; i < count; i++) { |
|
2426 |
mop = &mops->mo_list[i]; |
|
2427 |
||
2428 |
if (mop->mo_flags & MO_EMPTY) |
|
2429 |
continue; |
|
2430 |
if (strcmp(opt, mop->mo_name)) |
|
2431 |
continue; |
|
2432 |
if ((mop->mo_flags & MO_SET) == 0) |
|
2433 |
return (0); |
|
2434 |
if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0) |
|
2435 |
*argp = mop->mo_arg; |
|
2436 |
return (1); |
|
2437 |
} |
|
2438 |
return (0); |
|
2439 |
} |
|
2440 |
||
2441 |
||
2442 |
int |
|
2443 |
vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp) |
|
2444 |
{ |
|
2445 |
int ret; |
|
2446 |
||
2447 |
vfs_list_read_lock(); |
|
2448 |
ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp); |
|
2449 |
vfs_list_unlock(); |
|
2450 |
return (ret); |
|
2451 |
} |
|
2452 |
||
2453 |
||
2454 |
/* |
|
2455 |
* Construct a comma separated string of the options set in the given |
|
2456 |
* mount table, return the string in the given buffer. Return non-zero if |
|
2457 |
* the buffer would overflow. |
|
2458 |
* |
|
2459 |
* This function is *not* for general use by filesystems. |
|
2460 |
* |
|
2461 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
2462 |
* to protect mp. |
|
2463 |
*/ |
|
2464 |
int |
|
2465 |
vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len) |
|
2466 |
{ |
|
2467 |
char *cp; |
|
2468 |
uint_t i; |
|
2469 |
||
2470 |
buf[0] = '\0'; |
|
2471 |
cp = buf; |
|
2472 |
for (i = 0; i < mp->mo_count; i++) { |
|
2473 |
struct mntopt *mop; |
|
2474 |
||
2475 |
mop = &mp->mo_list[i]; |
|
2476 |
if (mop->mo_flags & MO_SET) { |
|
2477 |
int optlen, comma = 0; |
|
2478 |
||
2479 |
if (buf[0] != '\0') |
|
2480 |
comma = 1; |
|
2481 |
optlen = strlen(mop->mo_name); |
|
2482 |
if (strlen(buf) + comma + optlen + 1 > len) |
|
2483 |
goto err; |
|
2484 |
if (comma) |
|
2485 |
*cp++ = ','; |
|
2486 |
(void) strcpy(cp, mop->mo_name); |
|
2487 |
cp += optlen; |
|
2488 |
/* |
|
2489 |
* Append option value if there is one |
|
2490 |
*/ |
|
2491 |
if (mop->mo_arg != NULL) { |
|
2492 |
int arglen; |
|
2493 |
||
2494 |
arglen = strlen(mop->mo_arg); |
|
2495 |
if (strlen(buf) + arglen + 2 > len) |
|
2496 |
goto err; |
|
2497 |
*cp++ = '='; |
|
2498 |
(void) strcpy(cp, mop->mo_arg); |
|
2499 |
cp += arglen; |
|
2500 |
} |
|
2501 |
} |
|
2502 |
} |
|
2503 |
return (0); |
|
2504 |
err: |
|
2505 |
return (EOVERFLOW); |
|
2506 |
} |
|
2507 |
||
2508 |
static void |
|
2509 |
vfs_freecancelopt(char **moc) |
|
2510 |
{ |
|
2511 |
if (moc != NULL) { |
|
2512 |
int ccnt = 0; |
|
2513 |
char **cp; |
|
2514 |
||
2515 |
for (cp = moc; *cp != NULL; cp++) { |
|
2516 |
kmem_free(*cp, strlen(*cp) + 1); |
|
2517 |
ccnt++; |
|
2518 |
} |
|
2519 |
kmem_free(moc, (ccnt + 1) * sizeof (char *)); |
|
2520 |
} |
|
2521 |
} |
|
2522 |
||
2523 |
static void |
|
2524 |
vfs_freeopt(mntopt_t *mop) |
|
2525 |
{ |
|
2526 |
if (mop->mo_name != NULL) |
|
2527 |
kmem_free(mop->mo_name, strlen(mop->mo_name) + 1); |
|
2528 |
||
2529 |
vfs_freecancelopt(mop->mo_cancel); |
|
2530 |
||
2531 |
if (mop->mo_arg != NULL) |
|
2532 |
kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); |
|
2533 |
} |
|
2534 |
||
2535 |
/* |
|
2536 |
* Free a mount options table |
|
2537 |
* |
|
2538 |
* This function is *not* for general use by filesystems. |
|
2539 |
* |
|
2540 |
* Note: caller is responsible for locking the vfs list, if needed, |
|
2541 |
* to protect mp. |
|
2542 |
*/ |
|
2543 |
void |
|
2544 |
vfs_freeopttbl(mntopts_t *mp) |
|
2545 |
{ |
|
2546 |
uint_t i, count; |
|
2547 |
||
2548 |
count = mp->mo_count; |
|
2549 |
for (i = 0; i < count; i++) { |
|
2550 |
vfs_freeopt(&mp->mo_list[i]); |
|
2551 |
} |
|
2552 |
if (count) { |
|
2553 |
kmem_free(mp->mo_list, sizeof (mntopt_t) * count); |
|
2554 |
mp->mo_count = 0; |
|
2555 |
mp->mo_list = NULL; |
|
2556 |
} |
|
2557 |
} |
|
2558 |
||
2559 |
/* |
|
2560 |
* Free any mnttab information recorded in the vfs struct. |
|
2561 |
* The vfs must not be on the vfs list. |
|
2562 |
*/ |
|
2563 |
static void |
|
2564 |
vfs_freemnttab(struct vfs *vfsp) |
|
2565 |
{ |
|
2566 |
ASSERT(!VFS_ON_LIST(vfsp)); |
|
2567 |
||
2568 |
/* |
|
2569 |
* Free device and mount point information |
|
2570 |
*/ |
|
2571 |
if (vfsp->vfs_mntpt != NULL) { |
|
2572 |
refstr_rele(vfsp->vfs_mntpt); |
|
2573 |
vfsp->vfs_mntpt = NULL; |
|
2574 |
} |
|
2575 |
if (vfsp->vfs_resource != NULL) { |
|
2576 |
refstr_rele(vfsp->vfs_resource); |
|
2577 |
vfsp->vfs_resource = NULL; |
|
2578 |
} |
|
2579 |
/* |
|
2580 |
* Now free mount options information |
|
2581 |
*/ |
|
2582 |
vfs_freeopttbl(&vfsp->vfs_mntopts); |
|
2583 |
} |
|
2584 |
||
2585 |
/* |
|
2586 |
* Return the last mnttab modification time |
|
2587 |
*/ |
|
2588 |
void |
|
2589 |
vfs_mnttab_modtime(timespec_t *ts) |
|
2590 |
{ |
|
2591 |
ASSERT(RW_LOCK_HELD(&vfslist)); |
|
2592 |
*ts = vfs_mnttab_mtime; |
|
2593 |
} |
|
2594 |
||
2595 |
/* |
|
2596 |
* See if mnttab is changed |
|
2597 |
*/ |
|
2598 |
void |
|
2599 |
vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp) |
|
2600 |
{ |
|
2601 |
int changed; |
|
2602 |
||
2603 |
*phpp = (struct pollhead *)NULL; |
|
2604 |
||
2605 |
/* |
|
2606 |
* Note: don't grab vfs list lock before accessing vfs_mnttab_mtime. |
|
2607 |
* Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe |
|
2608 |
* to not grab the vfs list lock because tv_sec is monotonically |
|
2609 |
* increasing. |
|
2610 |
*/ |
|
2611 |
||
2612 |
changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) || |
|
2613 |
(old->tv_sec != vfs_mnttab_mtime.tv_sec); |
|
2614 |
if (!changed) { |
|
2615 |
*phpp = &vfs_pollhd; |
|
2616 |
} |
|
2617 |
} |
|
2618 |
||
2619 |
/* |
|
2620 |
* Update the mnttab modification time and wake up any waiters for |
|
2621 |
* mnttab changes |
|
2622 |
*/ |
|
2623 |
void |
|
2624 |
vfs_mnttab_modtimeupd() |
|
2625 |
{ |
|
2626 |
hrtime_t oldhrt, newhrt; |
|
2627 |
||
2628 |
ASSERT(RW_WRITE_HELD(&vfslist)); |
|
2629 |
oldhrt = ts2hrt(&vfs_mnttab_mtime); |
|
2630 |
gethrestime(&vfs_mnttab_mtime); |
|
2631 |
newhrt = ts2hrt(&vfs_mnttab_mtime); |
|
2632 |
if (oldhrt == (hrtime_t)0) |
|
2633 |
vfs_mnttab_ctime = vfs_mnttab_mtime; |
|
2634 |
/* |
|
2635 |
* Attempt to provide unique mtime (like uniqtime but not). |
|
2636 |
*/ |
|
2637 |
if (newhrt == oldhrt) { |
|
2638 |
newhrt++; |
|
2639 |
hrt2ts(newhrt, &vfs_mnttab_mtime); |
|
2640 |
} |
|
2641 |
pollwakeup(&vfs_pollhd, (short)POLLRDBAND); |
|
2642 |
} |
|
2643 |
||
2644 |
int |
|
2645 |
dounmount(struct vfs *vfsp, int flag, cred_t *cr) |
|
2646 |
{ |
|
2647 |
vnode_t *coveredvp; |
|
2648 |
int error; |
|
1488 | 2649 |
extern void teardown_vopstats(vfs_t *); |
0 | 2650 |
|
2651 |
/* |
|
2652 |
* Get covered vnode. This will be NULL if the vfs is not linked |
|
2653 |
* into the file system name space (i.e., domount() with MNT_NOSPICE). |
|
2654 |
*/ |
|
2655 |
coveredvp = vfsp->vfs_vnodecovered; |
|
2656 |
ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp)); |
|
2657 |
||
2658 |
/* |
|
2659 |
* Purge all dnlc entries for this vfs. |
|
2660 |
*/ |
|
2661 |
(void) dnlc_purge_vfsp(vfsp, 0); |
|
2662 |
||
2663 |
/* For forcible umount, skip VFS_SYNC() since it may hang */ |
|
2664 |
if ((flag & MS_FORCE) == 0) |
|
2665 |
(void) VFS_SYNC(vfsp, 0, cr); |
|
2666 |
||
2667 |
/* |
|
2668 |
* Lock the vfs to maintain fs status quo during unmount. This |
|
2669 |
* has to be done after the sync because ufs_update tries to acquire |
|
2670 |
* the vfs_reflock. |
|
2671 |
*/ |
|
2672 |
vfs_lock_wait(vfsp); |
|
2673 |
||
2674 |
if (error = VFS_UNMOUNT(vfsp, flag, cr)) { |
|
2675 |
vfs_unlock(vfsp); |
|
2676 |
if (coveredvp != NULL) |
|
2677 |
vn_vfsunlock(coveredvp); |
|
2678 |
} else if (coveredvp != NULL) { |
|
1488 | 2679 |
teardown_vopstats(vfsp); |
0 | 2680 |
/* |
2681 |
* vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered) |
|
2682 |
* when it frees vfsp so we do a VN_HOLD() so we can |
|
2683 |
* continue to use coveredvp afterwards. |
|
2684 |
*/ |
|
2685 |
VN_HOLD(coveredvp); |
|
2686 |
vfs_remove(vfsp); |
|
2687 |
vn_vfsunlock(coveredvp); |
|
2688 |
VN_RELE(coveredvp); |
|
2689 |
} else { |
|
1488 | 2690 |
teardown_vopstats(vfsp); |
0 | 2691 |
/* |
2692 |
* Release the reference to vfs that is not linked |
|
2693 |
* into the name space. |
|
2694 |
*/ |
|
2695 |
vfs_unlock(vfsp); |
|
2696 |
VFS_RELE(vfsp); |
|
2697 |
} |
|
2698 |
return (error); |
|
2699 |
} |
|
2700 |
||
2701 |
||
2702 |
/* |
|
2703 |
* Vfs_unmountall() is called by uadmin() to unmount all |
|
2704 |
* mounted file systems (except the root file system) during shutdown. |
|
2705 |
* It follows the existing locking protocol when traversing the vfs list |
|
2706 |
* to sync and unmount vfses. Even though there should be no |
|
2707 |
* other thread running while the system is shutting down, it is prudent |
|
2708 |
* to still follow the locking protocol. |
|
2709 |
*/ |
|
2710 |
void |
|
2711 |
vfs_unmountall(void) |
|
2712 |
{ |
|
2713 |
struct vfs *vfsp; |
|
2714 |
struct vfs *prev_vfsp = NULL; |
|
2715 |
int error; |
|
2716 |
||
2717 |
/* |
|
2718 |
* Toss all dnlc entries now so that the per-vfs sync |
|
2719 |
* and unmount operations don't have to slog through |
|
2720 |
* a bunch of uninteresting vnodes over and over again. |
|
2721 |
*/ |
|
2722 |
dnlc_purge(); |
|
2723 |
||
2724 |
vfs_list_lock(); |
|
2725 |
for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) { |
|
2726 |
prev_vfsp = vfsp->vfs_prev; |
|
2727 |
||
2728 |
if (vfs_lock(vfsp) != 0) |
|
2729 |
continue; |
|
2730 |
error = vn_vfswlock(vfsp->vfs_vnodecovered); |
|
2731 |
vfs_unlock(vfsp); |
|
2732 |
if (error) |
|
2733 |
continue; |
|
2734 |
||
2735 |
vfs_list_unlock(); |
|
2736 |
||
2737 |
(void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED()); |
|
2738 |
(void) dounmount(vfsp, 0, CRED()); |
|
2739 |
||
2740 |
/* |
|
2741 |
* Since we dropped the vfslist lock above we must |
|
2742 |
* verify that next_vfsp still exists, else start over. |
|
2743 |
*/ |
|
2744 |
vfs_list_lock(); |
|
2745 |
for (vfsp = rootvfs->vfs_prev; |
|
2746 |
vfsp != rootvfs; vfsp = vfsp->vfs_prev) |
|
2747 |
if (vfsp == prev_vfsp) |
|
2748 |
break; |
|
2749 |
if (vfsp == rootvfs && prev_vfsp != rootvfs) |
|
2750 |
prev_vfsp = rootvfs->vfs_prev; |
|
2751 |
} |
|
2752 |
vfs_list_unlock(); |
|
2753 |
} |
|
2754 |
||
2755 |
/* |
|
2756 |
* Called to add an entry to the end of the vfs mount in progress list |
|
2757 |
*/ |
|
2758 |
void |
|
2759 |
vfs_addmip(dev_t dev, struct vfs *vfsp) |
|
2760 |
{ |
|
2761 |
struct ipmnt *mipp; |
|
2762 |
||
2763 |
mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP); |
|
2764 |
mipp->mip_next = NULL; |
|
2765 |
mipp->mip_dev = dev; |
|
2766 |
mipp->mip_vfsp = vfsp; |
|
2767 |
mutex_enter(&vfs_miplist_mutex); |
|
2768 |
if (vfs_miplist_end != NULL) |
|
2769 |
vfs_miplist_end->mip_next = mipp; |
|
2770 |
else |
|
2771 |
vfs_miplist = mipp; |
|
2772 |
vfs_miplist_end = mipp; |
|
2773 |
mutex_exit(&vfs_miplist_mutex); |
|
2774 |
} |
|
2775 |
||
2776 |
/* |
|
2777 |
* Called to remove an entry from the mount in progress list |
|
2778 |
* Either because the mount completed or it failed. |
|
2779 |
*/ |
|
2780 |
void |
|
2781 |
vfs_delmip(struct vfs *vfsp) |
|
2782 |
{ |
|
2783 |
struct ipmnt *mipp, *mipprev; |
|
2784 |
||
2785 |
mutex_enter(&vfs_miplist_mutex); |
|
2786 |
mipprev = NULL; |
|
2787 |
for (mipp = vfs_miplist; |
|
2788 |
mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) { |
|
2789 |
mipprev = mipp; |
|
2790 |
} |
|
2791 |
if (mipp == NULL) |
|
2792 |
return; /* shouldn't happen */ |
|
2793 |
if (mipp == vfs_miplist_end) |
|
2794 |
vfs_miplist_end = mipprev; |
|
2795 |
if (mipprev == NULL) |
|
2796 |
vfs_miplist = mipp->mip_next; |
|
2797 |
else |
|
2798 |
mipprev->mip_next = mipp->mip_next; |
|
2799 |
mutex_exit(&vfs_miplist_mutex); |
|
2800 |
kmem_free(mipp, sizeof (struct ipmnt)); |
|
2801 |
} |
|
2802 |
||
2803 |
/* |
|
2804 |
* vfs_add is called by a specific filesystem's mount routine to add |
|
2805 |
* the new vfs into the vfs list/hash and to cover the mounted-on vnode. |
|
2806 |
* The vfs should already have been locked by the caller. |
|
2807 |
* |
|
2808 |
* coveredvp is NULL if this is the root. |
|
2809 |
*/ |
|
2810 |
void |
|
2811 |
vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag) |
|
2812 |
{ |
|
2813 |
int newflag; |
|
2814 |
||
2815 |
ASSERT(vfs_lock_held(vfsp)); |
|
2816 |
VFS_HOLD(vfsp); |
|
2817 |
newflag = vfsp->vfs_flag; |
|
2818 |
if (mflag & MS_RDONLY) |
|
2819 |
newflag |= VFS_RDONLY; |
|
2820 |
else |
|
2821 |
newflag &= ~VFS_RDONLY; |
|
2822 |
if (mflag & MS_NOSUID) |
|
2823 |
newflag |= (VFS_NOSETUID|VFS_NODEVICES); |
|
2824 |
else |
|
2825 |
newflag &= ~(VFS_NOSETUID|VFS_NODEVICES); |
|
2826 |
if (mflag & MS_NOMNTTAB) |
|
2827 |
newflag |= VFS_NOMNTTAB; |
|
2828 |
else |
|
2829 |
newflag &= ~VFS_NOMNTTAB; |
|
2830 |
||
2831 |
if (coveredvp != NULL) { |
|
2832 |
ASSERT(vn_vfswlock_held(coveredvp)); |
|
2833 |
coveredvp->v_vfsmountedhere = vfsp; |
|
2834 |
VN_HOLD(coveredvp); |
|
2835 |
} |
|
2836 |
vfsp->vfs_vnodecovered = coveredvp; |
|
2837 |
vfsp->vfs_flag = newflag; |
|
2838 |
||
2839 |
vfs_list_add(vfsp); |
|
2840 |
} |
|
2841 |
||
2842 |
/* |
|
2843 |
* Remove a vfs from the vfs list, null out the pointer from the |
|
2844 |
* covered vnode to the vfs (v_vfsmountedhere), and null out the pointer |
|
2845 |
* from the vfs to the covered vnode (vfs_vnodecovered). Release the |
|
2846 |
* reference to the vfs and to the covered vnode. |
|
2847 |
* |
|
2848 |
* Called from dounmount after it's confirmed with the file system |
|
2849 |
* that the unmount is legal. |
|
2850 |
*/ |
|
2851 |
void |
|
2852 |
vfs_remove(struct vfs *vfsp) |
|
2853 |
{ |
|
2854 |
vnode_t *vp; |
|
2855 |
||
2856 |
ASSERT(vfs_lock_held(vfsp)); |
|
2857 |
||
2858 |
/* |
|
2859 |
* Can't unmount root. Should never happen because fs will |
|
2860 |
* be busy. |
|
2861 |
*/ |
|
2862 |
if (vfsp == rootvfs) |
|
2863 |
cmn_err(CE_PANIC, "vfs_remove: unmounting root"); |
|
2864 |
||
2865 |
vfs_list_remove(vfsp); |
|
2866 |
||
2867 |
/* |
|
2868 |
* Unhook from the file system name space. |
|
2869 |
*/ |
|
2870 |
vp = vfsp->vfs_vnodecovered; |
|
2871 |
ASSERT(vn_vfswlock_held(vp)); |
|
2872 |
vp->v_vfsmountedhere = NULL; |
|
2873 |
vfsp->vfs_vnodecovered = NULL; |
|
2874 |
VN_RELE(vp); |
|
2875 |
||
2876 |
/* |
|
2877 |
* Release lock and wakeup anybody waiting. |
|
2878 |
*/ |
|
2879 |
vfs_unlock(vfsp); |
|
2880 |
VFS_RELE(vfsp); |
|
2881 |
} |
|
2882 |
||
2883 |
/* |
|
2884 |
* Lock a filesystem to prevent access to it while mounting, |
|
2885 |
* unmounting and syncing. Return EBUSY immediately if lock |
|
2886 |
* can't be acquired. |
|
2887 |
*/ |
|
2888 |
int |
|
2889 |
vfs_lock(vfs_t *vfsp) |
|
2890 |
{ |
|
2891 |
vn_vfslocks_entry_t *vpvfsentry; |
|
2892 |
||
2893 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
2894 |
if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER)) |
|
2895 |
return (0); |
|
2896 |
||
2897 |
vn_vfslocks_rele(vpvfsentry); |
|
2898 |
return (EBUSY); |
|
2899 |
} |
|
2900 |
||
2901 |
int |
|
2902 |
vfs_rlock(vfs_t *vfsp) |
|
2903 |
{ |
|
2904 |
vn_vfslocks_entry_t *vpvfsentry; |
|
2905 |
||
2906 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
2907 |
||
2908 |
if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER)) |
|
2909 |
return (0); |
|
2910 |
||
2911 |
vn_vfslocks_rele(vpvfsentry); |
|
2912 |
return (EBUSY); |
|
2913 |
} |
|
2914 |
||
2915 |
void |
|
2916 |
vfs_lock_wait(vfs_t *vfsp) |
|
2917 |
{ |
|
2918 |
vn_vfslocks_entry_t *vpvfsentry; |
|
2919 |
||
2920 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
2921 |
rwst_enter(&vpvfsentry->ve_lock, RW_WRITER); |
|
2922 |
} |
|
2923 |
||
2924 |
void |
|
2925 |
vfs_rlock_wait(vfs_t *vfsp) |
|
2926 |
{ |
|
2927 |
vn_vfslocks_entry_t *vpvfsentry; |
|
2928 |
||
2929 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
2930 |
rwst_enter(&vpvfsentry->ve_lock, RW_READER); |
|
2931 |
} |
|
2932 |
||
2933 |
/* |
|
2934 |
* Unlock a locked filesystem. |
|
2935 |
*/ |
|
2936 |
void |
|
2937 |
vfs_unlock(vfs_t *vfsp) |
|
2938 |
{ |
|
2939 |
vn_vfslocks_entry_t *vpvfsentry; |
|
2940 |
||
2941 |
/* |
|
2942 |
* vfs_unlock will mimic sema_v behaviour to fix 4748018. |
|
2943 |
* And these changes should remain for the patch changes as it is. |
|
2944 |
*/ |
|
2945 |
if (panicstr) |
|
2946 |
return; |
|
2947 |
||
2948 |
/* |
|
2949 |
* ve_refcount needs to be dropped twice here. |
|
2950 |
* 1. To release refernce after a call to vfs_locks_getlock() |
|
2951 |
* 2. To release the reference from the locking routines like |
|
2952 |
* vfs_rlock_wait/vfs_wlock_wait/vfs_wlock etc,. |
|
2953 |
*/ |
|
2954 |
||
2955 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
2956 |
vn_vfslocks_rele(vpvfsentry); |
|
2957 |
||
2958 |
rwst_exit(&vpvfsentry->ve_lock); |
|
2959 |
vn_vfslocks_rele(vpvfsentry); |
|
2960 |
} |
|
2961 |
||
2962 |
/* |
|
2963 |
* Utility routine that allows a filesystem to construct its |
|
2964 |
* fsid in "the usual way" - by munging some underlying dev_t and |
|
2965 |
* the filesystem type number into the 64-bit fsid. Note that |
|
2966 |
* this implicitly relies on dev_t persistence to make filesystem |
|
2967 |
* id's persistent. |
|
2968 |
* |
|
2969 |
* There's nothing to prevent an individual fs from constructing its |
|
2970 |
* fsid in a different way, and indeed they should. |
|
2971 |
* |
|
2972 |
* Since we want fsids to be 32-bit quantities (so that they can be |
|
2973 |
* exported identically by either 32-bit or 64-bit APIs, as well as |
|
2974 |
* the fact that fsid's are "known" to NFS), we compress the device |
|
2975 |
* number given down to 32-bits, and panic if that isn't possible. |
|
2976 |
*/ |
|
2977 |
void |
|
2978 |
vfs_make_fsid(fsid_t *fsi, dev_t dev, int val) |
|
2979 |
{ |
|
2980 |
if (!cmpldev((dev32_t *)&fsi->val[0], dev)) |
|
2981 |
panic("device number too big for fsid!"); |
|
2982 |
fsi->val[1] = val; |
|
2983 |
} |
|
2984 |
||
2985 |
int |
|
2986 |
vfs_lock_held(vfs_t *vfsp) |
|
2987 |
{ |
|
2988 |
int held; |
|
2989 |
vn_vfslocks_entry_t *vpvfsentry; |
|
2990 |
||
2991 |
/* |
|
2992 |
* vfs_lock_held will mimic sema_held behaviour |
|
2993 |
* if panicstr is set. And these changes should remain |
|
2994 |
* for the patch changes as it is. |
|
2995 |
*/ |
|
2996 |
if (panicstr) |
|
2997 |
return (1); |
|
2998 |
||
2999 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
3000 |
held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER); |
|
3001 |
||
3002 |
vn_vfslocks_rele(vpvfsentry); |
|
3003 |
return (held); |
|
3004 |
} |
|
3005 |
||
3006 |
struct _kthread * |
|
3007 |
vfs_lock_owner(vfs_t *vfsp) |
|
3008 |
{ |
|
3009 |
struct _kthread *owner; |
|
3010 |
vn_vfslocks_entry_t *vpvfsentry; |
|
3011 |
||
3012 |
/* |
|
3013 |
* vfs_wlock_held will mimic sema_held behaviour |
|
3014 |
* if panicstr is set. And these changes should remain |
|
3015 |
* for the patch changes as it is. |
|
3016 |
*/ |
|
3017 |
if (panicstr) |
|
3018 |
return (NULL); |
|
3019 |
||
3020 |
vpvfsentry = vn_vfslocks_getlock(vfsp); |
|
3021 |
owner = rwst_owner(&vpvfsentry->ve_lock); |
|
3022 |
||
3023 |
vn_vfslocks_rele(vpvfsentry); |
|
3024 |
return (owner); |
|
3025 |
} |
|
3026 |
||
3027 |
/* |
|
3028 |
* vfs list locking. |
|
3029 |
* |
|
3030 |
* Rather than manipulate the vfslist lock directly, we abstract into lock |
|
3031 |
* and unlock routines to allow the locking implementation to be changed for |
|
3032 |
* clustering. |
|
3033 |
* |
|
3034 |
* Whenever the vfs list is modified through its hash links, the overall list |
|
3035 |
* lock must be obtained before locking the relevant hash bucket. But to see |
|
3036 |
* whether a given vfs is on the list, it suffices to obtain the lock for the |
|
3037 |
* hash bucket without getting the overall list lock. (See getvfs() below.) |
|
3038 |
*/ |
|
3039 |
||
3040 |
void |
|
3041 |
vfs_list_lock() |
|
3042 |
{ |
|
3043 |
rw_enter(&vfslist, RW_WRITER); |
|
3044 |
} |
|
3045 |
||
3046 |
void |
|
3047 |
vfs_list_read_lock() |
|
3048 |
{ |
|
3049 |
rw_enter(&vfslist, RW_READER); |
|
3050 |
} |
|
3051 |
||
3052 |
void |
|
3053 |
vfs_list_unlock() |
|
3054 |
{ |
|
3055 |
rw_exit(&vfslist); |
|
3056 |
} |
|
3057 |
||
3058 |
/* |
|
3059 |
* Low level worker routines for adding entries to and removing entries from |
|
3060 |
* the vfs list. |
|
3061 |
*/ |
|
3062 |
||
3063 |
static void |
|
3064 |
vfs_hash_add(struct vfs *vfsp, int insert_at_head) |
|
3065 |
{ |
|
3066 |
int vhno; |
|
3067 |
struct vfs **hp; |
|
3068 |
dev_t dev; |
|
3069 |
||
3070 |
ASSERT(RW_WRITE_HELD(&vfslist)); |
|
3071 |
||
3072 |
dev = expldev(vfsp->vfs_fsid.val[0]); |
|
3073 |
vhno = VFSHASH(getmajor(dev), getminor(dev)); |
|
3074 |
||
3075 |
mutex_enter(&rvfs_list[vhno].rvfs_lock); |
|
3076 |
||
3077 |
/* |
|
3078 |
* Link into the hash table, inserting it at the end, so that LOFS |
|
3079 |
* with the same fsid as UFS (or other) file systems will not hide the |
|
3080 |
* UFS. |
|
3081 |
*/ |
|
3082 |
if (insert_at_head) { |
|
3083 |
vfsp->vfs_hash = rvfs_list[vhno].rvfs_head; |
|
3084 |
rvfs_list[vhno].rvfs_head = vfsp; |
|
3085 |
} else { |
|
3086 |
for (hp = &rvfs_list[vhno].rvfs_head; *hp != NULL; |
|
3087 |
hp = &(*hp)->vfs_hash) |
|
3088 |
continue; |
|
3089 |
/* |
|
3090 |
* hp now contains the address of the pointer to update |
|
3091 |
* to effect the insertion. |
|
3092 |
*/ |
|
3093 |
vfsp->vfs_hash = NULL; |
|
3094 |
*hp = vfsp; |
|
3095 |
} |
|
3096 |
||
3097 |
rvfs_list[vhno].rvfs_len++; |
|
3098 |
mutex_exit(&rvfs_list[vhno].rvfs_lock); |
|
3099 |
} |
|
3100 |
||
3101 |
||
3102 |
static void |
|
3103 |
vfs_hash_remove(struct vfs *vfsp) |
|
3104 |
{ |
|
3105 |
int vhno; |
|
3106 |
struct vfs *tvfsp; |
|
3107 |
dev_t dev; |
|
3108 |
||
3109 |
ASSERT(RW_WRITE_HELD(&vfslist)); |
|
3110 |
||
3111 |
dev = expldev(vfsp->vfs_fsid.val[0]); |
|
3112 |
vhno = VFSHASH(getmajor(dev), getminor(dev)); |
|
3113 |
||
3114 |
mutex_enter(&rvfs_list[vhno].rvfs_lock); |
|
3115 |
||
3116 |
/* |
|
3117 |
* Remove from hash. |
|
3118 |
*/ |
|
3119 |
if (rvfs_list[vhno].rvfs_head == vfsp) { |
|
3120 |
rvfs_list[vhno].rvfs_head = vfsp->vfs_hash; |
|
3121 |
rvfs_list[vhno].rvfs_len--; |
|
3122 |
goto foundit; |
|
3123 |
} |
|
3124 |
for (tvfsp = rvfs_list[vhno].rvfs_head; tvfsp != NULL; |
|
3125 |
tvfsp = tvfsp->vfs_hash) { |
|
3126 |
if (tvfsp->vfs_hash == vfsp) { |
|
3127 |
tvfsp->vfs_hash = vfsp->vfs_hash; |
|
3128 |
rvfs_list[vhno].rvfs_len--; |
|
3129 |
goto foundit; |
|
3130 |
} |
|
3131 |
} |
|
3132 |
cmn_err(CE_WARN, "vfs_list_remove: vfs not found in hash"); |
|
3133 |
||
3134 |
foundit: |
|
3135 |
||
3136 |
mutex_exit(&rvfs_list[vhno].rvfs_lock); |
|
3137 |
} |
|
3138 |
||
3139 |
||
3140 |
void |
|
3141 |
vfs_list_add(struct vfs *vfsp) |
|
3142 |
{ |
|
3143 |
zone_t *zone; |
|
3144 |
||
3145 |
/* |
|
3146 |
* The zone that owns the mount is the one that performed the mount. |
|
3147 |
* Note that this isn't necessarily the same as the zone mounted into. |
|
3148 |
* The corresponding zone_rele() will be done when the vfs_t is |
|
3149 |
* being free'd. |
|
3150 |
*/ |
|
3151 |
vfsp->vfs_zone = curproc->p_zone; |
|
3152 |
zone_hold(vfsp->vfs_zone); |
|
3153 |
||
3154 |
/* |
|
3155 |
* Find the zone mounted into, and put this mount on its vfs list. |
|
3156 |
*/ |
|
3157 |
zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); |
|
3158 |
ASSERT(zone != NULL); |
|
3159 |
/* |
|
3160 |
* Special casing for the root vfs. This structure is allocated |
|
3161 |
* statically and hooked onto rootvfs at link time. During the |
|
3162 |
* vfs_mountroot call at system startup time, the root file system's |
|
3163 |
* VFS_MOUNTROOT routine will call vfs_add with this root vfs struct |
|
3164 |
* as argument. The code below must detect and handle this special |
|
3165 |
* case. The only apparent justification for this special casing is |
|
3166 |
* to ensure that the root file system appears at the head of the |
|
3167 |
* list. |
|
3168 |
* |
|
3169 |
* XXX: I'm assuming that it's ok to do normal list locking when |
|
3170 |
* adding the entry for the root file system (this used to be |
|
3171 |
* done with no locks held). |
|
3172 |
*/ |
|
3173 |
vfs_list_lock(); |
|
3174 |
/* |
|
3175 |
* Link into the vfs list proper. |
|
3176 |
*/ |
|
3177 |
if (vfsp == &root) { |
|
3178 |
/* |
|
3179 |
* Assert: This vfs is already on the list as its first entry. |
|
3180 |
* Thus, there's nothing to do. |
|
3181 |
*/ |
|
3182 |
ASSERT(rootvfs == vfsp); |
|
3183 |
/* |
|
3184 |
* Add it to the head of the global zone's vfslist. |
|
3185 |
*/ |
|
3186 |
ASSERT(zone == global_zone); |
|
3187 |
ASSERT(zone->zone_vfslist == NULL); |
|
3188 |
zone->zone_vfslist = vfsp; |
|
3189 |
} else { |
|
3190 |
/* |
|
3191 |
* Link to end of list using vfs_prev (as rootvfs is now a |
|
3192 |
* doubly linked circular list) so list is in mount order for |
|
3193 |
* mnttab use. |
|
3194 |
*/ |
|
3195 |
rootvfs->vfs_prev->vfs_next = vfsp; |
|
3196 |
vfsp->vfs_prev = rootvfs->vfs_prev; |
|
3197 |
rootvfs->vfs_prev = vfsp; |
|
3198 |
vfsp->vfs_next = rootvfs; |
|
3199 |
||
3200 |
/* |
|
3201 |
* Do it again for the zone-private list (which may be NULL). |
|
3202 |
*/ |
|
3203 |
if (zone->zone_vfslist == NULL) { |
|
3204 |
ASSERT(zone != global_zone); |
|
3205 |
zone->zone_vfslist = vfsp; |
|
3206 |
} else { |
|
3207 |
zone->zone_vfslist->vfs_zone_prev->vfs_zone_next = vfsp; |
|
3208 |
vfsp->vfs_zone_prev = zone->zone_vfslist->vfs_zone_prev; |
|
3209 |
zone->zone_vfslist->vfs_zone_prev = vfsp; |
|
3210 |
vfsp->vfs_zone_next = zone->zone_vfslist; |
|
3211 |
} |
|
3212 |
} |
|
3213 |
||
3214 |
/* |
|
3215 |
* Link into the hash table, inserting it at the end, so that LOFS |
|
3216 |
* with the same fsid as UFS (or other) file systems will not hide |
|
3217 |
* the UFS. |
|
3218 |
*/ |
|
3219 |
vfs_hash_add(vfsp, 0); |
|
3220 |
||
3221 |
/* |
|
3222 |
* update the mnttab modification time |
|
3223 |
*/ |
|
3224 |
vfs_mnttab_modtimeupd(); |
|
3225 |
vfs_list_unlock(); |
|
3226 |
zone_rele(zone); |
|
3227 |
} |
|
3228 |
||
3229 |
void |
|
3230 |
vfs_list_remove(struct vfs *vfsp) |
|
3231 |
{ |
|
3232 |
zone_t *zone; |
|
3233 |
||
3234 |
zone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); |
|
3235 |
ASSERT(zone != NULL); |
|
3236 |
/* |
|
3237 |
* Callers are responsible for preventing attempts to unmount the |
|
3238 |
* root. |
|
3239 |
*/ |
|
3240 |
ASSERT(vfsp != rootvfs); |
|
3241 |
||
3242 |
vfs_list_lock(); |
|
3243 |
||
3244 |
/* |
|
3245 |
* Remove from hash. |
|
3246 |
*/ |
|
3247 |
vfs_hash_remove(vfsp); |
|
3248 |
||
3249 |
/* |
|
3250 |
* Remove from vfs list. |
|
3251 |
*/ |
|
3252 |
vfsp->vfs_prev->vfs_next = vfsp->vfs_next; |
|
3253 |
vfsp->vfs_next->vfs_prev = vfsp->vfs_prev; |
|
3254 |
vfsp->vfs_next = vfsp->vfs_prev = NULL; |
|
3255 |
||
3256 |
/* |
|
3257 |
* Remove from zone-specific vfs list. |
|
3258 |
*/ |
|
3259 |
if (zone->zone_vfslist == vfsp) |
|
3260 |
zone->zone_vfslist = vfsp->vfs_zone_next; |
|
3261 |
||
3262 |
if (vfsp->vfs_zone_next == vfsp) { |
|
3263 |
ASSERT(vfsp->vfs_zone_prev == vfsp); |
|
3264 |
ASSERT(zone->zone_vfslist == vfsp); |
|
3265 |
zone->zone_vfslist = NULL; |
|
3266 |
} |
|
3267 |
||
3268 |
vfsp->vfs_zone_prev->vfs_zone_next = vfsp->vfs_zone_next; |
|
3269 |
vfsp->vfs_zone_next->vfs_zone_prev = vfsp->vfs_zone_prev; |
|
3270 |
vfsp->vfs_zone_next = vfsp->vfs_zone_prev = NULL; |
|
3271 |
||
3272 |
/* |
|
3273 |
* update the mnttab modification time |
|
3274 |
*/ |
|
3275 |
vfs_mnttab_modtimeupd(); |
|
3276 |
vfs_list_unlock(); |
|
3277 |
zone_rele(zone); |
|
3278 |
} |
|
3279 |
||
3280 |
struct vfs * |
|
3281 |
getvfs(fsid_t *fsid) |
|
3282 |
{ |
|
3283 |
struct vfs *vfsp; |
|
3284 |
int val0 = fsid->val[0]; |
|
3285 |
int val1 = fsid->val[1]; |
|
3286 |
dev_t dev = expldev(val0); |
|
3287 |
int vhno = VFSHASH(getmajor(dev), getminor(dev)); |
|
3288 |
kmutex_t *hmp = &rvfs_list[vhno].rvfs_lock; |
|
3289 |
||
3290 |
mutex_enter(hmp); |
|
3291 |
for (vfsp = rvfs_list[vhno].rvfs_head; vfsp; vfsp = vfsp->vfs_hash) { |
|
3292 |
if (vfsp->vfs_fsid.val[0] == val0 && |
|
3293 |
vfsp->vfs_fsid.val[1] == val1) { |
|
3294 |
VFS_HOLD(vfsp); |
|
3295 |
mutex_exit(hmp); |
|
3296 |
return (vfsp); |
|
3297 |
} |
|
3298 |
} |
|
3299 |
mutex_exit(hmp); |
|
3300 |
return (NULL); |
|
3301 |
} |
|
3302 |
||
3303 |
/* |
|
3304 |
* Search the vfs mount in progress list for a specified device/vfs entry. |
|
3305 |
* Returns 0 if the first entry in the list that the device matches has the |
|
3306 |
* given vfs pointer as well. If the device matches but a different vfs |
|
3307 |
* pointer is encountered in the list before the given vfs pointer then |
|
3308 |
* a 1 is returned. |
|
3309 |
*/ |
|
3310 |
||
3311 |
int |
|
3312 |
vfs_devmounting(dev_t dev, struct vfs *vfsp) |
|
3313 |
{ |
|
3314 |
int retval = 0; |
|
3315 |
struct ipmnt *mipp; |
|
3316 |
||
3317 |
mutex_enter(&vfs_miplist_mutex); |
|
3318 |
for (mipp = vfs_miplist; mipp != NULL; mipp = mipp->mip_next) { |
|
3319 |
if (mipp->mip_dev == dev) { |
|
3320 |
if (mipp->mip_vfsp != vfsp) |
|
3321 |
retval = 1; |
|
3322 |
break; |
|
3323 |
} |
|
3324 |
} |
|
3325 |
mutex_exit(&vfs_miplist_mutex); |
|
3326 |
return (retval); |
|
3327 |
} |
|
3328 |
||
3329 |
/* |
|
3330 |
* Search the vfs list for a specified device. Returns 1, if entry is found |
|
3331 |
* or 0 if no suitable entry is found. |
|
3332 |
*/ |
|
3333 |
||
3334 |
int |
|
3335 |
vfs_devismounted(dev_t dev) |
|
3336 |
{ |
|
3337 |
struct vfs *vfsp; |
|
3338 |
int found; |
|
3339 |
||
3340 |
vfs_list_read_lock(); |
|
3341 |
vfsp = rootvfs; |
|
3342 |
found = 0; |
|
3343 |
do { |
|
3344 |
if (vfsp->vfs_dev == dev) { |
|
3345 |
found = 1; |
|
3346 |
break; |
|
3347 |
} |
|
3348 |
vfsp = vfsp->vfs_next; |
|
3349 |
} while (vfsp != rootvfs); |
|
3350 |
||
3351 |
vfs_list_unlock(); |
|
3352 |
return (found); |
|
3353 |
} |
|
3354 |
||
3355 |
/* |
|
3356 |
* Search the vfs list for a specified device. Returns a pointer to it |
|
3357 |
* or NULL if no suitable entry is found. The caller of this routine |
|
3358 |
* is responsible for releasing the returned vfs pointer. |
|
3359 |
*/ |
|
3360 |
struct vfs * |
|
3361 |
vfs_dev2vfsp(dev_t dev) |
|
3362 |
{ |
|
3363 |
struct vfs *vfsp; |
|
3364 |
int found; |
|
3365 |
||
3366 |
vfs_list_read_lock(); |
|
3367 |
vfsp = rootvfs; |
|
3368 |
found = 0; |
|
3369 |
do { |
|
3370 |
/* |
|
3371 |
* The following could be made more efficient by making |
|
3372 |
* the entire loop use vfs_zone_next if the call is from |
|
3373 |
* a zone. The only callers, however, ustat(2) and |
|
3374 |
* umount2(2), don't seem to justify the added |
|
3375 |
* complexity at present. |
|
3376 |
*/ |
|
3377 |
if (vfsp->vfs_dev == dev && |
|
3378 |
ZONE_PATH_VISIBLE(refstr_value(vfsp->vfs_mntpt), |
|
3379 |
curproc->p_zone)) { |
|
3380 |
VFS_HOLD(vfsp); |
|
3381 |
found = 1; |
|
3382 |
break; |
|
3383 |
} |
|
3384 |
vfsp = vfsp->vfs_next; |
|
3385 |
} while (vfsp != rootvfs); |
|
3386 |
vfs_list_unlock(); |
|
3387 |
return (found ? vfsp: NULL); |
|
3388 |
} |
|
3389 |
||
3390 |
/* |
|
3391 |
* Search the vfs list for a specified mntpoint. Returns a pointer to it |
|
3392 |
* or NULL if no suitable entry is found. The caller of this routine |
|
3393 |
* is responsible for releasing the returned vfs pointer. |
|
3394 |
* |
|
3395 |
* Note that if multiple mntpoints match, the last one matching is |
|
3396 |
* returned in an attempt to return the "top" mount when overlay |
|
3397 |
* mounts are covering the same mount point. This is accomplished by starting |
|
3398 |
* at the end of the list and working our way backwards, stopping at the first |
|
3399 |
* matching mount. |
|
3400 |
*/ |
|
3401 |
struct vfs * |
|
3402 |
vfs_mntpoint2vfsp(const char *mp) |
|
3403 |
{ |
|
3404 |
struct vfs *vfsp; |
|
3405 |
struct vfs *retvfsp = NULL; |
|
3406 |
zone_t *zone = curproc->p_zone; |
|
3407 |
struct vfs *list; |
|
3408 |
||
3409 |
vfs_list_read_lock(); |
|
3410 |
if (getzoneid() == GLOBAL_ZONEID) { |
|
3411 |
/* |
|
3412 |
* The global zone may see filesystems in any zone. |
|
3413 |
*/ |
|
3414 |
vfsp = rootvfs->vfs_prev; |
|
3415 |
do { |
|
3416 |
if (strcmp(refstr_value(vfsp->vfs_mntpt), mp) == 0) { |
|
3417 |
retvfsp = vfsp; |
|
3418 |
break; |
|
3419 |
} |
|
3420 |
vfsp = vfsp->vfs_prev; |
|
3421 |
} while (vfsp != rootvfs->vfs_prev); |
|
3422 |
} else if ((list = zone->zone_vfslist) != NULL) { |
|
3423 |
const char *mntpt; |
|
3424 |
||
3425 |
vfsp = list->vfs_zone_prev; |
|
3426 |
do { |
|
3427 |
mntpt = refstr_value(vfsp->vfs_mntpt); |
|
3428 |
mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); |
|
3429 |
if (strcmp(mntpt, mp) == 0) { |
|
3430 |
retvfsp = vfsp; |
|
3431 |
break; |
|
3432 |
} |
|
3433 |
vfsp = vfsp->vfs_zone_prev; |
|
3434 |
} while (vfsp != list->vfs_zone_prev); |
|
3435 |
} |
|
3436 |
if (retvfsp) |
|
3437 |
VFS_HOLD(retvfsp); |
|
3438 |
vfs_list_unlock(); |
|
3439 |
return (retvfsp); |
|
3440 |
} |
|
3441 |
||
3442 |
/* |
|
3443 |
* Search the vfs list for a specified vfsops. |
|
3444 |
* if vfs entry is found then return 1, else 0. |
|
3445 |
*/ |
|
3446 |
int |
|
3447 |
vfs_opsinuse(vfsops_t *ops) |
|
3448 |
{ |
|
3449 |
struct vfs *vfsp; |
|
3450 |
int found; |
|
3451 |
||
3452 |
vfs_list_read_lock(); |
|
3453 |
vfsp = rootvfs; |
|
3454 |
found = 0; |
|
3455 |
do { |
|
3456 |
if (vfs_getops(vfsp) == ops) { |
|
3457 |
found = 1; |
|
3458 |
break; |
|
3459 |
} |
|
3460 |
vfsp = vfsp->vfs_next; |
|
3461 |
} while (vfsp != rootvfs); |
|
3462 |
vfs_list_unlock(); |
|
3463 |
return (found); |
|
3464 |
} |
|
3465 |
||
3466 |
/* |
|
3467 |
* Allocate an entry in vfssw for a file system type |
|
3468 |
*/ |
|
3469 |
struct vfssw * |
|
3470 |
allocate_vfssw(char *type) |
|
3471 |
{ |
|
3472 |
struct vfssw *vswp; |
|
3473 |
||
3474 |
if (type[0] == '\0' || strlen(type) + 1 > _ST_FSTYPSZ) { |
|
3475 |
/* |
|
3476 |
* The vfssw table uses the empty string to identify an |
|
3477 |
* available entry; we cannot add any type which has |
|
3478 |
* a leading NUL. The string length is limited to |
|
3479 |
* the size of the st_fstype array in struct stat. |
|
3480 |
*/ |
|
3481 |
return (NULL); |
|
3482 |
} |
|
3483 |
||
3484 |
ASSERT(VFSSW_WRITE_LOCKED()); |
|
3485 |
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) |
|
3486 |
if (!ALLOCATED_VFSSW(vswp)) { |
|
3487 |
vswp->vsw_name = kmem_alloc(strlen(type) + 1, KM_SLEEP); |
|
3488 |
(void) strcpy(vswp->vsw_name, type); |
|
3489 |
ASSERT(vswp->vsw_count == 0); |
|
3490 |
vswp->vsw_count = 1; |
|
3491 |
mutex_init(&vswp->vsw_lock, NULL, MUTEX_DEFAULT, NULL); |
|
3492 |
return (vswp); |
|
3493 |
} |
|
3494 |
return (NULL); |
|
3495 |
} |
|
3496 |
||
3497 |
/* |
|
3498 |
* Impose additional layer of translation between vfstype names |
|
3499 |
* and module names in the filesystem. |
|
3500 |
*/ |
|
3501 |
static char * |
|
3502 |
vfs_to_modname(char *vfstype) |
|
3503 |
{ |
|
3504 |
if (strcmp(vfstype, "proc") == 0) { |
|
3505 |
vfstype = "procfs"; |
|
3506 |
} else if (strcmp(vfstype, "fd") == 0) { |
|
3507 |
vfstype = "fdfs"; |
|
3508 |
} else if (strncmp(vfstype, "nfs", 3) == 0) { |
|
3509 |
vfstype = "nfs"; |
|
3510 |
} |
|
3511 |
||
3512 |
return (vfstype); |
|
3513 |
} |
|
3514 |
||
3515 |
/* |
|
3516 |
* Find a vfssw entry given a file system type name. |
|
3517 |
* Try to autoload the filesystem if it's not found. |
|
3518 |
* If it's installed, return the vfssw locked to prevent unloading. |
|
3519 |
*/ |
|
3520 |
struct vfssw * |
|
3521 |
vfs_getvfssw(char *type) |
|
3522 |
{ |
|
3523 |
struct vfssw *vswp; |
|
3524 |
char *modname; |
|
3525 |
||
3526 |
RLOCK_VFSSW(); |
|
3527 |
vswp = vfs_getvfsswbyname(type); |
|
3528 |
modname = vfs_to_modname(type); |
|
3529 |
||
3530 |
if (rootdir == NULL) { |
|
3531 |
/* |
|
3532 |
* If we haven't yet loaded the root file system, then our |
|
3533 |
* _init won't be called until later. Allocate vfssw entry, |
|
3534 |
* because mod_installfs won't be called. |
|
3535 |
*/ |
|
3536 |
if (vswp == NULL) { |
|
3537 |
RUNLOCK_VFSSW(); |
|
3538 |
WLOCK_VFSSW(); |
|
3539 |
if ((vswp = vfs_getvfsswbyname(type)) == NULL) { |
|
3540 |
if ((vswp = allocate_vfssw(type)) == NULL) { |
|
3541 |
WUNLOCK_VFSSW(); |
|
3542 |
return (NULL); |
|
3543 |
} |
|
3544 |
} |
|
3545 |
WUNLOCK_VFSSW(); |
|
3546 |
RLOCK_VFSSW(); |
|
3547 |
} |
|
3548 |
if (!VFS_INSTALLED(vswp)) { |
|
3549 |
RUNLOCK_VFSSW(); |
|
3550 |
(void) modloadonly("fs", modname); |
|
3551 |
} else |
|
3552 |
RUNLOCK_VFSSW(); |
|
3553 |
return (vswp); |
|
3554 |
} |
|
3555 |
||
3556 |
/* |
|
3557 |
* Try to load the filesystem. Before calling modload(), we drop |
|
3558 |
* our lock on the VFS switch table, and pick it up after the |
|
3559 |
* module is loaded. However, there is a potential race: the |
|
3560 |
* module could be unloaded after the call to modload() completes |
|
3561 |
* but before we pick up the lock and drive on. Therefore, |
|
3562 |
* we keep reloading the module until we've loaded the module |
|
3563 |
* _and_ we have the lock on the VFS switch table. |
|
3564 |
*/ |
|
3565 |
while (vswp == NULL || !VFS_INSTALLED(vswp)) { |
|
3566 |
RUNLOCK_VFSSW(); |
|
3567 |
if (modload("fs", modname) == -1) |
|
3568 |
return (NULL); |
|
3569 |
RLOCK_VFSSW(); |
|
3570 |
if (vswp == NULL) |
|
3571 |
if ((vswp = vfs_getvfsswbyname(type)) == NULL) |
|
3572 |
break; |
|
3573 |
} |
|
3574 |
RUNLOCK_VFSSW(); |
|
3575 |
||
3576 |
return (vswp); |
|
3577 |
} |
|
3578 |
||
3579 |
/* |
|
3580 |
* Find a vfssw entry given a file system type name. |
|
3581 |
*/ |
|
3582 |
struct vfssw * |
|
3583 |
vfs_getvfsswbyname(char *type) |
|
3584 |
{ |
|
3585 |
struct vfssw *vswp; |
|
3586 |
||
3587 |
ASSERT(VFSSW_LOCKED()); |
|
3588 |
if (type == NULL || *type == '\0') |
|
3589 |
return (NULL); |
|
3590 |
||
3591 |
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { |
|
3592 |
if (strcmp(type, vswp->vsw_name) == 0) { |
|
3593 |
vfs_refvfssw(vswp); |
|
3594 |
return (vswp); |
|
3595 |
} |
|
3596 |
} |
|
3597 |
||
3598 |
return (NULL); |
|
3599 |
} |
|
3600 |
||
3601 |
/* |
|
3602 |
* Find a vfssw entry given a set of vfsops. |
|
3603 |
*/ |
|
3604 |
struct vfssw * |
|
3605 |
vfs_getvfsswbyvfsops(vfsops_t *vfsops) |
|
3606 |
{ |
|
3607 |
struct vfssw *vswp; |
|
3608 |
||
3609 |
RLOCK_VFSSW(); |
|
3610 |
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { |
|
3611 |
if (ALLOCATED_VFSSW(vswp) && &vswp->vsw_vfsops == vfsops) { |
|
3612 |
vfs_refvfssw(vswp); |
|
3613 |
RUNLOCK_VFSSW(); |
|
3614 |
return (vswp); |
|
3615 |
} |
|
3616 |
} |
|
3617 |
RUNLOCK_VFSSW(); |
|
3618 |
||
3619 |
return (NULL); |
|
3620 |
} |
|
3621 |
||
3622 |
/* |
|
3623 |
* Reference a vfssw entry. |
|
3624 |
*/ |
|
3625 |
void |
|
3626 |
vfs_refvfssw(struct vfssw *vswp) |
|
3627 |
{ |
|
3628 |
||
3629 |
mutex_enter(&vswp->vsw_lock); |
|
3630 |
vswp->vsw_count++; |
|
3631 |
mutex_exit(&vswp->vsw_lock); |
|
3632 |
} |
|
3633 |
||
3634 |
/* |
|
3635 |
* Unreference a vfssw entry. |
|
3636 |
*/ |
|
3637 |
void |
|
3638 |
vfs_unrefvfssw(struct vfssw *vswp) |
|
3639 |
{ |
|
3640 |
||
3641 |
mutex_enter(&vswp->vsw_lock); |
|
3642 |
vswp->vsw_count--; |
|
3643 |
mutex_exit(&vswp->vsw_lock); |
|
3644 |
} |
|
3645 |
||
3646 |
int sync_timeout = 30; /* timeout for syncing a page during panic */ |
|
3647 |
int sync_timeleft; /* portion of sync_timeout remaining */ |
|
3648 |
||
3649 |
static int sync_retries = 20; /* number of retries when not making progress */ |
|
3650 |
static int sync_triesleft; /* portion of sync_retries remaining */ |
|
3651 |
||
3652 |
static pgcnt_t old_pgcnt, new_pgcnt; |
|
3653 |
static int new_bufcnt, old_bufcnt; |
|
3654 |
||
3655 |
/* |
|
3656 |
* Sync all of the mounted filesystems, and then wait for the actual i/o to |
|
3657 |
* complete. We wait by counting the number of dirty pages and buffers, |
|
3658 |
* pushing them out using bio_busy() and page_busy(), and then counting again. |
|
3659 |
* This routine is used during both the uadmin A_SHUTDOWN code as well as |
|
3660 |
* the SYNC phase of the panic code (see comments in panic.c). It should only |
|
3661 |
* be used after some higher-level mechanism has quiesced the system so that |
|
3662 |
* new writes are not being initiated while we are waiting for completion. |
|
3663 |
* |
|
3664 |
* To ensure finite running time, our algorithm uses two timeout mechanisms: |
|
3665 |
* sync_timeleft (a timer implemented by the omnipresent deadman() cyclic), and |
|
3666 |
* sync_triesleft (a progress counter used by the vfs_syncall() loop below). |
|
3667 |
* Together these ensure that syncing completes if our i/o paths are stuck. |
|
3668 |
* The counters are declared above so they can be found easily in the debugger. |
|
3669 |
* |
|
3670 |
* The sync_timeleft counter is reset by bio_busy() and page_busy() using the |
|
3671 |
* vfs_syncprogress() subroutine whenever we make progress through the lists of |
|
3672 |
* pages and buffers. It is decremented and expired by the deadman() cyclic. |
|
3673 |
* When vfs_syncall() decides it is done, we disable the deadman() counter by |
|
3674 |
* setting sync_timeleft to zero. This timer guards against vfs_syncall() |
|
3675 |
* deadlocking or hanging inside of a broken filesystem or driver routine. |
|
3676 |
* |
|
3677 |
* The sync_triesleft counter is updated by vfs_syncall() itself. If we make |
|
3678 |
* sync_retries consecutive calls to bio_busy() and page_busy() without |
|
3679 |
* decreasing either the number of dirty buffers or dirty pages below the |
|
3680 |
* lowest count we have seen so far, we give up and return from vfs_syncall(). |
|
3681 |
* |
|
3682 |
* Each loop iteration ends with a call to delay() one second to allow time for |
|
3683 |
* i/o completion and to permit the user time to read our progress messages. |
|
3684 |
*/ |
|
3685 |
void |
|
3686 |
vfs_syncall(void) |
|
3687 |
{ |
|
3688 |
if (rootdir == NULL && !modrootloaded) |
|
3689 |
return; /* panic during boot - no filesystems yet */ |
|
3690 |
||
3691 |
printf("syncing file systems..."); |
|
3692 |
vfs_syncprogress(); |
|
3693 |
sync(); |
|
3694 |
||
3695 |
vfs_syncprogress(); |
|
3696 |
sync_triesleft = sync_retries; |
|
3697 |
||
3698 |
old_bufcnt = new_bufcnt = INT_MAX; |
|
3699 |
old_pgcnt = new_pgcnt = ULONG_MAX; |
|
3700 |
||
3701 |
while (sync_triesleft > 0) { |
|
3702 |
old_bufcnt = MIN(old_bufcnt, new_bufcnt); |
|
3703 |
old_pgcnt = MIN(old_pgcnt, new_pgcnt); |
|
3704 |
||
3705 |
new_bufcnt = bio_busy(B_TRUE); |
|
3706 |
new_pgcnt = page_busy(B_TRUE); |
|
3707 |
vfs_syncprogress(); |
|
3708 |
||
3709 |
if (new_bufcnt == 0 && new_pgcnt == 0) |
|
3710 |
break; |
|
3711 |
||
3712 |
if (new_bufcnt < old_bufcnt || new_pgcnt < old_pgcnt) |
|
3713 |
sync_triesleft = sync_retries; |
|
3714 |
else |
|
3715 |
sync_triesleft--; |
|
3716 |
||
3717 |
if (new_bufcnt) |
|
3718 |
printf(" [%d]", new_bufcnt); |
|
3719 |
if (new_pgcnt) |
|
3720 |
printf(" %lu", new_pgcnt); |
|
3721 |
||
3722 |
delay(hz); |
|
3723 |
} |
|
3724 |
||
3725 |
if (new_bufcnt != 0 || new_pgcnt != 0) |
|
3726 |
printf(" done (not all i/o completed)\n"); |
|
3727 |
else |
|
3728 |
printf(" done\n"); |
|
3729 |
||
3730 |
sync_timeleft = 0; |
|
3731 |
delay(hz); |
|
3732 |
} |
|
3733 |
||
3734 |
/* |
|
3735 |
* If we are in the middle of the sync phase of panic, reset sync_timeleft to |
|
3736 |
* sync_timeout to indicate that we are making progress and the deadman() |
|
3737 |
* omnipresent cyclic should not yet time us out. Note that it is safe to |
|
3738 |
* store to sync_timeleft here since the deadman() is firing at high-level |
|
3739 |
* on top of us. If we are racing with the deadman(), either the deadman() |
|
3740 |
* will decrement the old value and then we will reset it, or we will |
|
3741 |
* reset it and then the deadman() will immediately decrement it. In either |
|
3742 |
* case, correct behavior results. |
|
3743 |
*/ |
|
3744 |
void |
|
3745 |
vfs_syncprogress(void) |
|
3746 |
{ |
|
3747 |
if (panicstr) |
|
3748 |
sync_timeleft = sync_timeout; |
|
3749 |
} |
|
3750 |
||
3751 |
/* |
|
3752 |
* Map VFS flags to statvfs flags. These shouldn't really be separate |
|
3753 |
* flags at all. |
|
3754 |
*/ |
|
3755 |
uint_t |
|
3756 |
vf_to_stf(uint_t vf) |
|
3757 |
{ |
|
3758 |
uint_t stf = 0; |
|
3759 |
||
3760 |
if (vf & VFS_RDONLY) |
|
3761 |
stf |= ST_RDONLY; |
|
3762 |
if (vf & VFS_NOSETUID) |
|
3763 |
stf |= ST_NOSUID; |
|
3764 |
if (vf & VFS_NOTRUNC) |
|
3765 |
stf |= ST_NOTRUNC; |
|
3766 |
||
3767 |
return (stf); |
|
3768 |
} |
|
3769 |
||
3770 |
/* |
|
3771 |
* Use old-style function prototype for vfsstray() so |
|
3772 |
* that we can use it anywhere in the vfsops structure. |
|
3773 |
*/ |
|
3774 |
int vfsstray(); |
|
3775 |
||
3776 |
/* |
|
3777 |
* Entries for (illegal) fstype 0. |
|
3778 |
*/ |
|
3779 |
/* ARGSUSED */ |
|
3780 |
int |
|
3781 |
vfsstray_sync(struct vfs *vfsp, short arg, struct cred *cr) |
|
3782 |
{ |
|
3783 |
cmn_err(CE_PANIC, "stray vfs operation"); |
|
3784 |
return (0); |
|
3785 |
} |
|
3786 |
||
3787 |
vfsops_t vfs_strayops = { |
|
3788 |
vfsstray, |
|
3789 |
vfsstray, |
|
3790 |
vfsstray, |
|
3791 |
vfsstray, |
|
3792 |
vfsstray_sync, |
|
3793 |
vfsstray, |
|
3794 |
vfsstray, |
|
3795 |
vfsstray |
|
3796 |
}; |
|
3797 |
||
3798 |
/* |
|
3799 |
* Entries for (illegal) fstype 0. |
|
3800 |
*/ |
|
3801 |
int |
|
3802 |
vfsstray(void) |
|
3803 |
{ |
|
3804 |
cmn_err(CE_PANIC, "stray vfs operation"); |
|
3805 |
return (0); |
|
3806 |
} |
|
3807 |
||
3808 |
/* |
|
3809 |
* Support for dealing with forced UFS unmount and its interaction with |
|
3810 |
* LOFS. Could be used by any filesystem. |
|
3811 |
* See bug 1203132. |
|
3812 |
*/ |
|
3813 |
int |
|
3814 |
vfs_EIO(void) |
|
3815 |
{ |
|
3816 |
return (EIO); |
|
3817 |
} |
|
3818 |
||
3819 |
/* |
|
3820 |
* We've gotta define the op for sync separately, since the compiler gets |
|
3821 |
* confused if we mix and match ANSI and normal style prototypes when |
|
3822 |
* a "short" argument is present and spits out a warning. |
|
3823 |
*/ |
|
3824 |
/*ARGSUSED*/ |
|
3825 |
int |
|
3826 |
vfs_EIO_sync(struct vfs *vfsp, short arg, struct cred *cr) |
|
3827 |
{ |
|
3828 |
return (EIO); |
|
3829 |
} |
|
3830 |
||
3831 |
vfs_t EIO_vfs; |
|
3832 |
vfsops_t *EIO_vfsops; |
|
3833 |
||
3834 |
/* |
|
3835 |
* Called from startup() to initialize all loaded vfs's |
|
3836 |
*/ |
|
3837 |
void |
|
3838 |
vfsinit(void) |
|
3839 |
{ |
|
3840 |
struct vfssw *vswp; |
|
3841 |
int error; |
|
1520 | 3842 |
extern int vopstats_enabled; |
1488 | 3843 |
extern void vopstats_startup(); |
0 | 3844 |
|
3845 |
static const fs_operation_def_t EIO_vfsops_template[] = { |
|
3846 |
VFSNAME_MOUNT, vfs_EIO, |
|
3847 |
VFSNAME_UNMOUNT, vfs_EIO, |
|
3848 |
VFSNAME_ROOT, vfs_EIO, |
|
3849 |
VFSNAME_STATVFS, vfs_EIO, |
|
3850 |
VFSNAME_SYNC, (fs_generic_func_p) vfs_EIO_sync, |
|
3851 |
VFSNAME_VGET, vfs_EIO, |
|
3852 |
VFSNAME_MOUNTROOT, vfs_EIO, |
|
3853 |
VFSNAME_FREEVFS, vfs_EIO, |
|
3854 |
VFSNAME_VNSTATE, vfs_EIO, |
|
3855 |
NULL, NULL |
|
3856 |
}; |
|
3857 |
||
3858 |
||
3859 |
/* Initialize the vnode cache (file systems may use it during init). */ |
|
3860 |
||
3861 |
vn_create_cache(); |
|
3862 |
||
3863 |
/* Setup event monitor framework */ |
|
3864 |
||
3865 |
fem_init(); |
|
3866 |
||
3867 |
/* Initialize the dummy stray file system type. */ |
|
3868 |
||
3869 |
vfssw[0].vsw_vfsops = vfs_strayops; |
|
3870 |
||
3871 |
/* Initialize the dummy EIO file system. */ |
|
3872 |
error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops); |
|
3873 |
if (error != 0) { |
|
3874 |
cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template"); |
|
3875 |
/* Shouldn't happen, but not bad enough to panic */ |
|
3876 |
} |
|
3877 |
||
3878 |
VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL); |
|
3879 |
||
3880 |
/* |
|
3881 |
* Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup |
|
3882 |
* on this vfs can immediately notice it's invalid. |
|
3883 |
*/ |
|
3884 |
EIO_vfs.vfs_flag |= VFS_UNMOUNTED; |
|
3885 |
||
3886 |
/* |
|
3887 |
* Call the init routines of non-loadable filesystems only. |
|
3888 |
* Filesystems which are loaded as separate modules will be |
|
3889 |
* initialized by the module loading code instead. |
|
3890 |
*/ |
|
3891 |
||
3892 |
for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { |
|
3893 |
RLOCK_VFSSW(); |
|
3894 |
if (vswp->vsw_init != NULL) |
|
3895 |
(*vswp->vsw_init)(vswp - vfssw, vswp->vsw_name); |
|
3896 |
RUNLOCK_VFSSW(); |
|
3897 |
} |
|
1488 | 3898 |
|
3899 |
vopstats_startup(); |
|
1520 | 3900 |
|
3901 |
if (vopstats_enabled) { |
|
3902 |
/* EIO_vfs can collect stats, but we don't retrieve them */ |
|
3903 |
initialize_vopstats(&EIO_vfs.vfs_vopstats); |
|
3904 |
EIO_vfs.vfs_fstypevsp = NULL; |
|
3905 |
EIO_vfs.vfs_vskap = NULL; |
|
3906 |
EIO_vfs.vfs_flag |= VFS_STATS; |
|
3907 |
} |
|
0 | 3908 |
} |
3909 |
||
3910 |
/* |
|
3911 |
* Increments the vfs reference count by one atomically. |
|
3912 |
*/ |
|
3913 |
void |
|
3914 |
vfs_hold(vfs_t *vfsp) |
|
3915 |
{ |
|
3916 |
atomic_add_32(&vfsp->vfs_count, 1); |
|
3917 |
ASSERT(vfsp->vfs_count != 0); |
|
3918 |
} |
|
3919 |
||
3920 |
/* |
|
3921 |
* Decrements the vfs reference count by one atomically. When |
|
3922 |
* vfs reference count becomes zero, it calls the file system |
|
3923 |
* specific vfs_freevfs() to free up the resources. |
|
3924 |
*/ |
|
3925 |
void |
|
3926 |
vfs_rele(vfs_t *vfsp) |
|
3927 |
{ |
|
3928 |
ASSERT(vfsp->vfs_count != 0); |
|
3929 |
if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) { |
|
3930 |
VFS_FREEVFS(vfsp); |
|
3931 |
if (vfsp->vfs_zone) |
|
3932 |
zone_rele(vfsp->vfs_zone); |
|
3933 |
vfs_freemnttab(vfsp); |
|
1925 | 3934 |
if (vfsp->vfs_implp) |
3935 |
vfsimpl_teardown(vfsp); |
|
0 | 3936 |
sema_destroy(&vfsp->vfs_reflock); |
3937 |
kmem_free(vfsp, sizeof (*vfsp)); |
|
3938 |
} |
|
3939 |
} |
|
3940 |
||
3941 |
/* |
|
3942 |
* Generic operations vector support. |
|
3943 |
* |
|
3944 |
* This is used to build operations vectors for both the vfs and vnode. |
|
3945 |
* It's normally called only when a file system is loaded. |
|
3946 |
* |
|
3947 |
* There are many possible algorithms for this, including the following: |
|
3948 |
* |
|
3949 |
* (1) scan the list of known operations; for each, see if the file system |
|
3950 |
* includes an entry for it, and fill it in as appropriate. |
|
3951 |
* |
|
3952 |
* (2) set up defaults for all known operations. scan the list of ops |
|
3953 |
* supplied by the file system; for each which is both supplied and |
|
3954 |
* known, fill it in. |
|
3955 |
* |
|
3956 |
* (3) sort the lists of known ops & supplied ops; scan the list, filling |
|
3957 |
* in entries as we go. |
|
3958 |
* |
|
3959 |
* we choose (1) for simplicity, and because performance isn't critical here. |
|
3960 |
* note that (2) could be sped up using a precomputed hash table on known ops. |
|
3961 |
* (3) could be faster than either, but only if the lists were very large or |
|
3962 |
* supplied in sorted order. |
|
3963 |
* |
|
3964 |
*/ |
|
3965 |
||
3966 |
int |
|
3967 |
fs_build_vector(void *vector, int *unused_ops, |
|
3968 |
const fs_operation_trans_def_t *translation, |
|
3969 |
const fs_operation_def_t *operations) |
|
3970 |
{ |
|
3971 |
int i, num_trans, num_ops, used; |
|
3972 |
||
3973 |
/* Count the number of translations and the number of supplied */ |
|
3974 |
/* operations. */ |
|
3975 |
||
3976 |
{ |
|
3977 |
const fs_operation_trans_def_t *p; |
|
3978 |
||
3979 |
for (num_trans = 0, p = translation; |
|
3980 |
p->name != NULL; |
|
3981 |
num_trans++, p++) |
|
3982 |
; |
|
3983 |
} |
|
3984 |
||
3985 |
{ |
|
3986 |
const fs_operation_def_t *p; |
|
3987 |
||
3988 |
for (num_ops = 0, p = operations; |
|
3989 |
p->name != NULL; |
|
3990 |
num_ops++, p++) |
|
3991 |
; |
|
3992 |
} |
|
3993 |
||
3994 |
/* Walk through each operation known to our caller. There will be */ |
|
3995 |
/* one entry in the supplied "translation table" for each. */ |
|
3996 |
||
3997 |
used = 0; |
|
3998 |
||
3999 |
for (i = 0; i < num_trans; i++) { |
|
4000 |
int j, found; |
|
4001 |
char *curname; |
|
4002 |
fs_generic_func_p result; |
|
4003 |
fs_generic_func_p *location; |
|
4004 |
||
4005 |
curname = translation[i].name; |
|
4006 |
||
4007 |
/* Look for a matching operation in the list supplied by the */ |
|
4008 |
/* file system. */ |
|
4009 |
||
4010 |
found = 0; |
|
4011 |
||
4012 |
for (j = 0; j < num_ops; j++) { |
|
4013 |
if (strcmp(operations[j].name, curname) == 0) { |
|
4014 |
used++; |
|
4015 |
found = 1; |
|
4016 |
break; |
|
4017 |
} |
|
4018 |
} |
|
4019 |
||
4020 |
/* If the file system is using a "placeholder" for default */ |
|
4021 |
/* or error functions, grab the appropriate function out of */ |
|
4022 |
/* the translation table. If the file system didn't supply */ |
|
4023 |
/* this operation at all, use the default function. */ |
|
4024 |
||
4025 |
if (found) { |
|
4026 |
result = operations[j].func; |
|
4027 |
if (result == fs_default) { |
|
4028 |
result = translation[i].defaultFunc; |
|
4029 |
} else if (result == fs_error) { |
|
4030 |
result = translation[i].errorFunc; |
|
4031 |
} else if (result == NULL) { |
|
4032 |
/* Null values are PROHIBITED */ |
|
4033 |
return (EINVAL); |
|
4034 |
} |
|
4035 |
} else { |
|
4036 |
result = translation[i].defaultFunc; |
|
4037 |
} |
|
4038 |
||
4039 |
/* Now store the function into the operations vector. */ |
|
4040 |
||
4041 |
location = (fs_generic_func_p *) |
|
4042 |
(((char *)vector) + translation[i].offset); |
|
4043 |
||
4044 |
*location = result; |
|
4045 |
} |
|
4046 |
||
4047 |
*unused_ops = num_ops - used; |
|
4048 |
||
4049 |
return (0); |
|
4050 |
} |
|
4051 |
||
4052 |
/* Placeholder functions, should never be called. */ |
|
4053 |
||
4054 |
int |
|
4055 |
fs_error(void) |
|
4056 |
{ |
|
4057 |
cmn_err(CE_PANIC, "fs_error called"); |
|
4058 |
return (0); |
|
4059 |
} |
|
4060 |
||
4061 |
int |
|
4062 |
fs_default(void) |
|
4063 |
{ |
|
4064 |
cmn_err(CE_PANIC, "fs_default called"); |
|
4065 |
return (0); |
|
4066 |
} |
|
4067 |
||
4068 |
#ifdef __sparc |
|
4069 |
||
4070 |
/* |
|
4071 |
* Part of the implementation of booting off a mirrored root |
|
4072 |
* involves a change of dev_t for the root device. To |
|
4073 |
* accomplish this, first remove the existing hash table |
|
4074 |
* entry for the root device, convert to the new dev_t, |
|
4075 |
* then re-insert in the hash table at the head of the list. |
|
4076 |
*/ |
|
4077 |
void |
|
4078 |
vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype) |
|
4079 |
{ |
|
4080 |
vfs_list_lock(); |
|
4081 |
||
4082 |
vfs_hash_remove(vfsp); |
|
4083 |
||
4084 |
vfsp->vfs_dev = ndev; |
|
4085 |
vfs_make_fsid(&vfsp->vfs_fsid, ndev, fstype); |
|
4086 |
||
4087 |
vfs_hash_add(vfsp, 1); |
|
4088 |
||
4089 |
vfs_list_unlock(); |
|
4090 |
} |
|
4091 |
||
4092 |
#else /* x86 NEWBOOT */ |
|
4093 |
||
4094 |
int |
|
4095 |
rootconf() |
|
4096 |
{ |
|
4097 |
int error; |
|
4098 |
struct vfssw *vsw; |
|
4099 |
extern void pm_init(); |
|
4100 |
char *fstyp; |
|
4101 |
||
4102 |
fstyp = getrootfs(); |
|
4103 |
||
4104 |
if (error = clboot_rootconf()) |
|
4105 |
return (error); |
|
4106 |
||
4107 |
if (modload("fs", fstyp) == -1) |
|
4108 |
cmn_err(CE_PANIC, "Cannot _init %s module\n", fstyp); |
|
4109 |
||
4110 |
RLOCK_VFSSW(); |
|
4111 |
vsw = vfs_getvfsswbyname(fstyp); |
|
4112 |
RUNLOCK_VFSSW(); |
|
4113 |
VFS_INIT(rootvfs, &vsw->vsw_vfsops, 0); |
|
4114 |
VFS_HOLD(rootvfs); |
|
4115 |
||
4116 |
/* always mount readonly first */ |
|
4117 |
rootvfs->vfs_flag |= VFS_RDONLY; |
|
4118 |
||
4119 |
pm_init(); |
|
4120 |
||
4121 |
if (netboot) |
|
4122 |
(void) strplumb(); |
|
4123 |
||
4124 |
error = VFS_MOUNTROOT(rootvfs, ROOT_INIT); |
|
4125 |
vfs_unrefvfssw(vsw); |
|
4126 |
rootdev = rootvfs->vfs_dev; |
|
4127 |
||
4128 |
if (error) |
|
4129 |
cmn_err(CE_PANIC, "cannot mount root path %s", svm_bootpath); |
|
4130 |
return (error); |
|
4131 |
} |
|
4132 |
||
4133 |
/* |
|
4134 |
* XXX this is called by nfs only and should probably be removed |
|
4135 |
* If booted with ASKNAME, prompt on the console for a filesystem |
|
4136 |
* name and return it. |
|
4137 |
*/ |
|
4138 |
void |
|
4139 |
getfsname(char *askfor, char *name, size_t namelen) |
|
4140 |
{ |
|
4141 |
if (boothowto & RB_ASKNAME) { |
|
4142 |
printf("%s name: ", askfor); |
|
4143 |
console_gets(name, namelen); |
|
4144 |
} |
|
4145 |
} |
|
4146 |
||
4147 |
/* |
|
4148 |
* If server_path exists, then we are booting a diskless |
|
4149 |
* client. Otherwise, we default to ufs. Zfs should perhaps be |
|
4150 |
* another property. |
|
4151 |
*/ |
|
4152 |
static char * |
|
4153 |
getrootfs(void) |
|
4154 |
{ |
|
4155 |
extern char *strplumb_get_netdev_path(void); |
|
4156 |
char *propstr = NULL; |
|
4157 |
||
4158 |
/* check fstype property; it should be nfsdyn for diskless */ |
|
4159 |
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), |
|
4160 |
DDI_PROP_DONTPASS, "fstype", &propstr) |
|
4161 |
== DDI_SUCCESS) { |
|
4162 |
(void) strncpy(rootfs.bo_fstype, propstr, BO_MAXFSNAME); |
|
4163 |
ddi_prop_free(propstr); |
|
4164 |
} |
|
4165 |
||
4166 |
if (strncmp(rootfs.bo_fstype, "nfs", 3) != 0) |
|
4167 |
return (rootfs.bo_fstype); |
|
4168 |
||
4169 |
++netboot; |
|
4170 |
/* check if path to network interface is specified in bootpath */ |
|
4171 |
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), |
|
4172 |
DDI_PROP_DONTPASS, "bootpath", &propstr) |
|
4173 |
== DDI_SUCCESS) { |
|
4174 |
(void) strncpy(rootfs.bo_name, propstr, BO_MAXOBJNAME); |
|
4175 |
ddi_prop_free(propstr); |
|
4176 |
} else { |
|
4177 |
/* attempt to determine netdev_path via boot_mac address */ |
|
4178 |
netdev_path = strplumb_get_netdev_path(); |
|
4179 |
if (netdev_path == NULL) |
|
4180 |
cmn_err(CE_PANIC, |
|
4181 |
"Cannot find boot network interface\n"); |
|
4182 |
(void) strncpy(rootfs.bo_name, netdev_path, BO_MAXOBJNAME); |
|
4183 |
} |
|
4184 |
return ("nfs"); |
|
4185 |
} |
|
4186 |
#endif |