author | eschrock |
Tue, 05 Sep 2006 11:37:36 -0700 | |
changeset 2676 | 5cee47eddab6 |
parent 2597 | 21c0f93f2513 |
child 3280 | e93ccc27c51d |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
5 |
* Common Development and Distribution License (the "License"). |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1231
64215f768e86
6354804 The file's ACL was changed when cp it from one ZFS file system to another one.
marks
parents:
885
diff
changeset
|
22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/types.h> |
|
29 |
#include <sys/param.h> |
|
30 |
#include <sys/time.h> |
|
31 |
#include <sys/systm.h> |
|
32 |
#include <sys/sysmacros.h> |
|
33 |
#include <sys/resource.h> |
|
34 |
#include <sys/vfs.h> |
|
35 |
#include <sys/vnode.h> |
|
36 |
#include <sys/file.h> |
|
37 |
#include <sys/mode.h> |
|
38 |
#include <sys/kmem.h> |
|
39 |
#include <sys/uio.h> |
|
40 |
#include <sys/pathname.h> |
|
41 |
#include <sys/cmn_err.h> |
|
42 |
#include <sys/errno.h> |
|
43 |
#include <sys/stat.h> |
|
44 |
#include <sys/unistd.h> |
|
45 |
#include <sys/random.h> |
|
46 |
#include <sys/policy.h> |
|
47 |
#include <sys/zfs_dir.h> |
|
48 |
#include <sys/zfs_acl.h> |
|
49 |
#include <sys/fs/zfs.h> |
|
50 |
#include "fs/fs_subr.h" |
|
51 |
#include <sys/zap.h> |
|
52 |
#include <sys/dmu.h> |
|
53 |
#include <sys/atomic.h> |
|
54 |
#include <sys/zfs_ctldir.h> |
|
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
55 |
#include <sys/dnlc.h> |
789 | 56 |
|
57 |
/* |
|
58 |
* Lock a directory entry. A dirlock on <dzp, name> protects that name |
|
59 |
* in dzp's directory zap object. As long as you hold a dirlock, you can |
|
60 |
* assume two things: (1) dzp cannot be reaped, and (2) no other thread |
|
61 |
* can change the zap entry for (i.e. link or unlink) this name. |
|
62 |
* |
|
63 |
* Input arguments: |
|
64 |
* dzp - znode for directory |
|
65 |
* name - name of entry to lock |
|
66 |
* flag - ZNEW: if the entry already exists, fail with EEXIST. |
|
67 |
* ZEXISTS: if the entry does not exist, fail with ENOENT. |
|
68 |
* ZSHARED: allow concurrent access with other ZSHARED callers. |
|
69 |
* ZXATTR: we want dzp's xattr directory |
|
70 |
* |
|
71 |
* Output arguments: |
|
72 |
* zpp - pointer to the znode for the entry (NULL if there isn't one) |
|
73 |
* dlpp - pointer to the dirlock for this entry (NULL on error) |
|
74 |
* |
|
75 |
* Return value: 0 on success or errno on failure. |
|
76 |
* |
|
77 |
* NOTE: Always checks for, and rejects, '.' and '..'. |
|
78 |
*/ |
|
79 |
int |
|
80 |
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, |
|
81 |
int flag) |
|
82 |
{ |
|
83 |
zfsvfs_t *zfsvfs = dzp->z_zfsvfs; |
|
84 |
zfs_dirlock_t *dl; |
|
85 |
uint64_t zoid; |
|
86 |
int error; |
|
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
87 |
vnode_t *vp; |
789 | 88 |
|
89 |
*zpp = NULL; |
|
90 |
*dlpp = NULL; |
|
91 |
||
92 |
/* |
|
93 |
* Verify that we are not trying to lock '.', '..', or '.zfs' |
|
94 |
*/ |
|
95 |
if (name[0] == '.' && |
|
96 |
(name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || |
|
97 |
zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) |
|
98 |
return (EEXIST); |
|
99 |
||
100 |
/* |
|
101 |
* Wait until there are no locks on this name. |
|
102 |
*/ |
|
103 |
mutex_enter(&dzp->z_lock); |
|
104 |
for (;;) { |
|
105 |
if (dzp->z_reap) { |
|
106 |
mutex_exit(&dzp->z_lock); |
|
107 |
return (ENOENT); |
|
108 |
} |
|
109 |
for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) |
|
110 |
if (strcmp(name, dl->dl_name) == 0) |
|
111 |
break; |
|
112 |
if (dl == NULL) { |
|
113 |
/* |
|
114 |
* Allocate a new dirlock and add it to the list. |
|
115 |
*/ |
|
116 |
dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); |
|
117 |
cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); |
|
118 |
dl->dl_name = name; |
|
119 |
dl->dl_sharecnt = 0; |
|
120 |
dl->dl_namesize = 0; |
|
121 |
dl->dl_dzp = dzp; |
|
122 |
dl->dl_next = dzp->z_dirlocks; |
|
123 |
dzp->z_dirlocks = dl; |
|
124 |
break; |
|
125 |
} |
|
126 |
if ((flag & ZSHARED) && dl->dl_sharecnt != 0) |
|
127 |
break; |
|
128 |
cv_wait(&dl->dl_cv, &dzp->z_lock); |
|
129 |
} |
|
130 |
||
131 |
if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { |
|
132 |
/* |
|
133 |
* We're the second shared reference to dl. Make a copy of |
|
134 |
* dl_name in case the first thread goes away before we do. |
|
135 |
* Note that we initialize the new name before storing its |
|
136 |
* pointer into dl_name, because the first thread may load |
|
137 |
* dl->dl_name at any time. He'll either see the old value, |
|
138 |
* which is his, or the new shared copy; either is OK. |
|
139 |
*/ |
|
140 |
dl->dl_namesize = strlen(dl->dl_name) + 1; |
|
141 |
name = kmem_alloc(dl->dl_namesize, KM_SLEEP); |
|
142 |
bcopy(dl->dl_name, name, dl->dl_namesize); |
|
143 |
dl->dl_name = name; |
|
144 |
} |
|
145 |
||
146 |
mutex_exit(&dzp->z_lock); |
|
147 |
||
148 |
/* |
|
149 |
* We have a dirlock on the name. (Note that it is the dirlock, |
|
150 |
* not the dzp's z_lock, that protects the name in the zap object.) |
|
151 |
* See if there's an object by this name; if so, put a hold on it. |
|
152 |
*/ |
|
153 |
if (flag & ZXATTR) { |
|
154 |
zoid = dzp->z_phys->zp_xattr; |
|
155 |
error = (zoid == 0 ? ENOENT : 0); |
|
156 |
} else { |
|
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
157 |
vp = dnlc_lookup(ZTOV(dzp), name); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
158 |
if (vp == DNLC_NO_VNODE) { |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
159 |
VN_RELE(vp); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
160 |
error = ENOENT; |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
161 |
} else if (vp) { |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
162 |
if (flag & ZNEW) { |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
163 |
zfs_dirent_unlock(dl); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
164 |
VN_RELE(vp); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
165 |
return (EEXIST); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
166 |
} |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
167 |
*dlpp = dl; |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
168 |
*zpp = VTOZ(vp); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
169 |
return (0); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
170 |
} else { |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
171 |
error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
172 |
8, 1, &zoid); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
173 |
if (error == ENOENT) |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
174 |
dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
175 |
} |
789 | 176 |
} |
177 |
if (error) { |
|
178 |
if (error != ENOENT || (flag & ZEXISTS)) { |
|
179 |
zfs_dirent_unlock(dl); |
|
180 |
return (error); |
|
181 |
} |
|
182 |
} else { |
|
183 |
if (flag & ZNEW) { |
|
184 |
zfs_dirent_unlock(dl); |
|
185 |
return (EEXIST); |
|
186 |
} |
|
187 |
error = zfs_zget(zfsvfs, zoid, zpp); |
|
188 |
if (error) { |
|
189 |
zfs_dirent_unlock(dl); |
|
190 |
return (error); |
|
191 |
} |
|
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
192 |
if (!(flag & ZXATTR)) |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
193 |
dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); |
789 | 194 |
} |
195 |
||
196 |
*dlpp = dl; |
|
197 |
||
198 |
return (0); |
|
199 |
} |
|
200 |
||
201 |
/* |
|
202 |
* Unlock this directory entry and wake anyone who was waiting for it. |
|
203 |
*/ |
|
204 |
void |
|
205 |
zfs_dirent_unlock(zfs_dirlock_t *dl) |
|
206 |
{ |
|
207 |
znode_t *dzp = dl->dl_dzp; |
|
208 |
zfs_dirlock_t **prev_dl, *cur_dl; |
|
209 |
||
210 |
mutex_enter(&dzp->z_lock); |
|
211 |
if (dl->dl_sharecnt > 1) { |
|
212 |
dl->dl_sharecnt--; |
|
213 |
mutex_exit(&dzp->z_lock); |
|
214 |
return; |
|
215 |
} |
|
216 |
prev_dl = &dzp->z_dirlocks; |
|
217 |
while ((cur_dl = *prev_dl) != dl) |
|
218 |
prev_dl = &cur_dl->dl_next; |
|
219 |
*prev_dl = dl->dl_next; |
|
220 |
cv_broadcast(&dl->dl_cv); |
|
221 |
mutex_exit(&dzp->z_lock); |
|
222 |
||
223 |
if (dl->dl_namesize != 0) |
|
224 |
kmem_free(dl->dl_name, dl->dl_namesize); |
|
225 |
cv_destroy(&dl->dl_cv); |
|
226 |
kmem_free(dl, sizeof (*dl)); |
|
227 |
} |
|
228 |
||
229 |
/* |
|
230 |
* Look up an entry in a directory. |
|
231 |
* |
|
232 |
* NOTE: '.' and '..' are handled as special cases because |
|
233 |
* no directory entries are actually stored for them. If this is |
|
234 |
* the root of a filesystem, then '.zfs' is also treated as a |
|
235 |
* special pseudo-directory. |
|
236 |
*/ |
|
237 |
int |
|
238 |
zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp) |
|
239 |
{ |
|
240 |
zfs_dirlock_t *dl; |
|
241 |
znode_t *zp; |
|
242 |
int error = 0; |
|
243 |
||
244 |
if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { |
|
245 |
*vpp = ZTOV(dzp); |
|
246 |
VN_HOLD(*vpp); |
|
247 |
} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { |
|
248 |
zfsvfs_t *zfsvfs = dzp->z_zfsvfs; |
|
249 |
/* |
|
250 |
* If we are a snapshot mounted under .zfs, return |
|
251 |
* the vp for the snapshot directory. |
|
252 |
*/ |
|
1878
c22df0f5603f
6413573 deadlock between fsflush() and zfs_create()
maybee
parents:
1544
diff
changeset
|
253 |
if (dzp->z_phys->zp_parent == dzp->z_id && |
c22df0f5603f
6413573 deadlock between fsflush() and zfs_create()
maybee
parents:
1544
diff
changeset
|
254 |
zfsvfs->z_parent != zfsvfs) { |
789 | 255 |
error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, |
256 |
"snapshot", vpp, NULL, 0, NULL, kcred); |
|
257 |
return (error); |
|
258 |
} |
|
259 |
rw_enter(&dzp->z_parent_lock, RW_READER); |
|
260 |
error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp); |
|
261 |
if (error == 0) |
|
262 |
*vpp = ZTOV(zp); |
|
263 |
rw_exit(&dzp->z_parent_lock); |
|
264 |
} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { |
|
265 |
*vpp = zfsctl_root(dzp); |
|
266 |
} else { |
|
267 |
error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS | ZSHARED); |
|
268 |
if (error == 0) { |
|
269 |
*vpp = ZTOV(zp); |
|
270 |
zfs_dirent_unlock(dl); |
|
869
dc133b87dfb3
6297285 znode prefetching in zfs_readdir causes 5x performance degradation for 'ls'
perrin
parents:
789
diff
changeset
|
271 |
dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ |
789 | 272 |
} |
273 |
} |
|
274 |
||
275 |
return (error); |
|
276 |
} |
|
277 |
||
278 |
static char * |
|
279 |
zfs_dq_hexname(char namebuf[17], uint64_t x) |
|
280 |
{ |
|
281 |
char *name = &namebuf[16]; |
|
282 |
const char digits[16] = "0123456789abcdef"; |
|
283 |
||
284 |
*name = '\0'; |
|
285 |
do { |
|
286 |
*--name = digits[x & 0xf]; |
|
287 |
x >>= 4; |
|
288 |
} while (x != 0); |
|
289 |
||
290 |
return (name); |
|
291 |
} |
|
292 |
||
1544 | 293 |
/* |
294 |
* Delete Queue Error Handling |
|
295 |
* |
|
296 |
* When dealing with the delete queue, we dmu_tx_hold_zap(), but we |
|
297 |
* don't specify the name of the entry that we will be manipulating. We |
|
298 |
* also fib and say that we won't be adding any new entries to the |
|
299 |
* delete queue, even though we might (this is to lower the minimum file |
|
300 |
* size that can be deleted in a full filesystem). So on the small |
|
301 |
* chance that the delete queue is using a fat zap (ie. has more than |
|
302 |
* 2000 entries), we *may* not pre-read a block that's needed. |
|
303 |
* Therefore it is remotely possible for some of the assertions |
|
304 |
* regarding the delete queue below to fail due to i/o error. On a |
|
305 |
* nondebug system, this will result in the space being leaked. |
|
306 |
*/ |
|
307 |
||
789 | 308 |
void |
309 |
zfs_dq_add(znode_t *zp, dmu_tx_t *tx) |
|
310 |
{ |
|
311 |
zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
|
312 |
char obj_name[17]; |
|
313 |
int error; |
|
314 |
||
315 |
ASSERT(zp->z_reap); |
|
316 |
ASSERT3U(zp->z_phys->zp_links, ==, 0); |
|
317 |
||
318 |
error = zap_add(zfsvfs->z_os, zfsvfs->z_dqueue, |
|
319 |
zfs_dq_hexname(obj_name, zp->z_id), 8, 1, &zp->z_id, tx); |
|
320 |
ASSERT3U(error, ==, 0); |
|
321 |
} |
|
322 |
||
323 |
/* |
|
324 |
* Delete the entire contents of a directory. Return a count |
|
325 |
* of the number of entries that could not be deleted. |
|
326 |
* |
|
327 |
* NOTE: this function assumes that the directory is inactive, |
|
328 |
* so there is no need to lock its entries before deletion. |
|
329 |
* Also, it assumes the directory contents is *only* regular |
|
330 |
* files. |
|
331 |
*/ |
|
332 |
static int |
|
333 |
zfs_purgedir(znode_t *dzp) |
|
334 |
{ |
|
335 |
zap_cursor_t zc; |
|
336 |
zap_attribute_t zap; |
|
337 |
znode_t *xzp; |
|
338 |
dmu_tx_t *tx; |
|
339 |
zfsvfs_t *zfsvfs = dzp->z_zfsvfs; |
|
340 |
zfs_dirlock_t dl; |
|
341 |
int skipped = 0; |
|
342 |
int error; |
|
343 |
||
344 |
||
345 |
for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); |
|
346 |
(error = zap_cursor_retrieve(&zc, &zap)) == 0; |
|
347 |
zap_cursor_advance(&zc)) { |
|
348 |
error = zfs_zget(zfsvfs, zap.za_first_integer, &xzp); |
|
349 |
ASSERT3U(error, ==, 0); |
|
350 |
||
351 |
ASSERT((ZTOV(xzp)->v_type == VREG) || |
|
352 |
(ZTOV(xzp)->v_type == VLNK)); |
|
353 |
||
354 |
tx = dmu_tx_create(zfsvfs->z_os); |
|
355 |
dmu_tx_hold_bonus(tx, dzp->z_id); |
|
1544 | 356 |
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); |
789 | 357 |
dmu_tx_hold_bonus(tx, xzp->z_id); |
1544 | 358 |
dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL); |
789 | 359 |
error = dmu_tx_assign(tx, TXG_WAIT); |
360 |
if (error) { |
|
361 |
dmu_tx_abort(tx); |
|
362 |
VN_RELE(ZTOV(xzp)); |
|
363 |
skipped += 1; |
|
364 |
continue; |
|
365 |
} |
|
366 |
bzero(&dl, sizeof (dl)); |
|
367 |
dl.dl_dzp = dzp; |
|
368 |
dl.dl_name = zap.za_name; |
|
369 |
||
370 |
error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); |
|
371 |
ASSERT3U(error, ==, 0); |
|
372 |
dmu_tx_commit(tx); |
|
373 |
||
374 |
VN_RELE(ZTOV(xzp)); |
|
375 |
} |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
869
diff
changeset
|
376 |
zap_cursor_fini(&zc); |
789 | 377 |
ASSERT(error == ENOENT); |
378 |
return (skipped); |
|
379 |
} |
|
380 |
||
381 |
/* |
|
382 |
* Special function to requeue the znodes for deletion that were |
|
383 |
* in progress when we either crashed or umounted the file system. |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
384 |
* |
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
385 |
* returns 1 if queue was drained. |
789 | 386 |
*/ |
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
387 |
static int |
789 | 388 |
zfs_drain_dq(zfsvfs_t *zfsvfs) |
389 |
{ |
|
390 |
zap_cursor_t zc; |
|
391 |
zap_attribute_t zap; |
|
392 |
dmu_object_info_t doi; |
|
393 |
znode_t *zp; |
|
394 |
int error; |
|
395 |
||
396 |
/* |
|
397 |
* Interate over the contents of the delete queue. |
|
398 |
*/ |
|
399 |
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_dqueue); |
|
400 |
zap_cursor_retrieve(&zc, &zap) == 0; |
|
401 |
zap_cursor_advance(&zc)) { |
|
402 |
||
403 |
/* |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
404 |
* Create more threads if necessary to balance the load. |
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
405 |
* quit if the delete threads have been shut down. |
789 | 406 |
*/ |
407 |
if (zfs_delete_thread_target(zfsvfs, -1) != 0) |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
408 |
return (0); |
789 | 409 |
|
410 |
/* |
|
411 |
* See what kind of object we have in queue |
|
412 |
*/ |
|
413 |
||
414 |
error = dmu_object_info(zfsvfs->z_os, |
|
415 |
zap.za_first_integer, &doi); |
|
416 |
if (error != 0) |
|
417 |
continue; |
|
418 |
||
419 |
ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || |
|
420 |
(doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); |
|
421 |
/* |
|
422 |
* We need to re-mark these queue entries for reaping, |
|
423 |
* so we pull them back into core and set zp->z_reap. |
|
424 |
*/ |
|
425 |
error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); |
|
426 |
||
427 |
/* |
|
428 |
* We may pick up znodes that are already marked for reaping. |
|
429 |
* This could happen during the purge of an extended attribute |
|
430 |
* directory. All we need to do is skip over them, since they |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
431 |
* are already in the system to be processed by the delete |
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
432 |
* thread(s). |
789 | 433 |
*/ |
434 |
if (error != 0) { |
|
435 |
continue; |
|
436 |
} |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
437 |
|
789 | 438 |
zp->z_reap = 1; |
439 |
VN_RELE(ZTOV(zp)); |
|
440 |
} |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
869
diff
changeset
|
441 |
zap_cursor_fini(&zc); |
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
442 |
return (1); |
789 | 443 |
} |
444 |
||
445 |
void |
|
446 |
zfs_delete_thread(void *arg) |
|
447 |
{ |
|
448 |
zfsvfs_t *zfsvfs = arg; |
|
449 |
zfs_delete_t *zd = &zfsvfs->z_delete_head; |
|
450 |
znode_t *zp; |
|
451 |
callb_cpr_t cprinfo; |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
452 |
int drained; |
789 | 453 |
|
454 |
CALLB_CPR_INIT(&cprinfo, &zd->z_mutex, callb_generic_cpr, "zfs_delete"); |
|
455 |
||
456 |
mutex_enter(&zd->z_mutex); |
|
457 |
||
458 |
if (!zd->z_drained && !zd->z_draining) { |
|
459 |
zd->z_draining = B_TRUE; |
|
460 |
mutex_exit(&zd->z_mutex); |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
461 |
drained = zfs_drain_dq(zfsvfs); |
789 | 462 |
mutex_enter(&zd->z_mutex); |
463 |
zd->z_draining = B_FALSE; |
|
2245
0f2733e0b56b
6436526 delete_queue thread reporting drained when it may not be true
marks
parents:
2113
diff
changeset
|
464 |
zd->z_drained = drained; |
789 | 465 |
cv_broadcast(&zd->z_quiesce_cv); |
466 |
} |
|
467 |
||
468 |
while (zd->z_thread_count <= zd->z_thread_target) { |
|
469 |
zp = list_head(&zd->z_znodes); |
|
470 |
if (zp == NULL) { |
|
471 |
ASSERT(zd->z_znode_count == 0); |
|
472 |
CALLB_CPR_SAFE_BEGIN(&cprinfo); |
|
473 |
cv_wait(&zd->z_cv, &zd->z_mutex); |
|
474 |
CALLB_CPR_SAFE_END(&cprinfo, &zd->z_mutex); |
|
475 |
continue; |
|
476 |
} |
|
477 |
ASSERT(zd->z_znode_count != 0); |
|
478 |
list_remove(&zd->z_znodes, zp); |
|
479 |
if (--zd->z_znode_count == 0) |
|
480 |
cv_broadcast(&zd->z_quiesce_cv); |
|
481 |
mutex_exit(&zd->z_mutex); |
|
482 |
zfs_rmnode(zp); |
|
483 |
(void) zfs_delete_thread_target(zfsvfs, -1); |
|
484 |
mutex_enter(&zd->z_mutex); |
|
485 |
} |
|
486 |
||
487 |
ASSERT(zd->z_thread_count != 0); |
|
488 |
if (--zd->z_thread_count == 0) |
|
489 |
cv_broadcast(&zd->z_cv); |
|
490 |
||
491 |
CALLB_CPR_EXIT(&cprinfo); /* NB: drops z_mutex */ |
|
492 |
thread_exit(); |
|
493 |
} |
|
494 |
||
495 |
static int zfs_work_per_thread_shift = 11; /* 2048 (2^11) per thread */ |
|
496 |
||
497 |
/* |
|
498 |
* Set the target number of delete threads to 'nthreads'. |
|
499 |
* If nthreads == -1, choose a number based on current workload. |
|
500 |
* If nthreads == 0, don't return until the threads have exited. |
|
501 |
*/ |
|
502 |
int |
|
503 |
zfs_delete_thread_target(zfsvfs_t *zfsvfs, int nthreads) |
|
504 |
{ |
|
505 |
zfs_delete_t *zd = &zfsvfs->z_delete_head; |
|
506 |
||
507 |
mutex_enter(&zd->z_mutex); |
|
508 |
||
509 |
if (nthreads == -1) { |
|
510 |
if (zd->z_thread_target == 0) { |
|
511 |
mutex_exit(&zd->z_mutex); |
|
512 |
return (EBUSY); |
|
513 |
} |
|
514 |
nthreads = zd->z_znode_count >> zfs_work_per_thread_shift; |
|
515 |
nthreads = MIN(nthreads, ncpus << 1); |
|
516 |
nthreads = MAX(nthreads, 1); |
|
517 |
nthreads += !!zd->z_draining; |
|
518 |
} |
|
519 |
||
520 |
zd->z_thread_target = nthreads; |
|
521 |
||
522 |
while (zd->z_thread_count < zd->z_thread_target) { |
|
523 |
(void) thread_create(NULL, 0, zfs_delete_thread, zfsvfs, |
|
524 |
0, &p0, TS_RUN, minclsyspri); |
|
525 |
zd->z_thread_count++; |
|
526 |
} |
|
527 |
||
528 |
while (zd->z_thread_count > zd->z_thread_target && nthreads == 0) { |
|
529 |
cv_broadcast(&zd->z_cv); |
|
530 |
cv_wait(&zd->z_cv, &zd->z_mutex); |
|
531 |
} |
|
532 |
||
533 |
mutex_exit(&zd->z_mutex); |
|
534 |
||
535 |
return (0); |
|
536 |
} |
|
537 |
||
538 |
/* |
|
539 |
* Wait until everything that's been queued has been deleted. |
|
540 |
*/ |
|
541 |
void |
|
542 |
zfs_delete_wait_empty(zfsvfs_t *zfsvfs) |
|
543 |
{ |
|
544 |
zfs_delete_t *zd = &zfsvfs->z_delete_head; |
|
545 |
||
546 |
mutex_enter(&zd->z_mutex); |
|
547 |
ASSERT(zd->z_thread_target != 0); |
|
548 |
while (!zd->z_drained || zd->z_znode_count != 0) { |
|
549 |
ASSERT(zd->z_thread_target != 0); |
|
550 |
cv_wait(&zd->z_quiesce_cv, &zd->z_mutex); |
|
551 |
} |
|
552 |
mutex_exit(&zd->z_mutex); |
|
553 |
} |
|
554 |
||
555 |
void |
|
556 |
zfs_rmnode(znode_t *zp) |
|
557 |
{ |
|
558 |
zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
|
559 |
objset_t *os = zfsvfs->z_os; |
|
560 |
znode_t *xzp = NULL; |
|
561 |
char obj_name[17]; |
|
562 |
dmu_tx_t *tx; |
|
563 |
uint64_t acl_obj; |
|
564 |
int error; |
|
565 |
||
566 |
ASSERT(ZTOV(zp)->v_count == 0); |
|
567 |
ASSERT(zp->z_phys->zp_links == 0); |
|
568 |
||
569 |
/* |
|
570 |
* If this is an attribute directory, purge its contents. |
|
571 |
*/ |
|
572 |
if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) |
|
573 |
if (zfs_purgedir(zp) != 0) { |
|
574 |
zfs_delete_t *delq = &zfsvfs->z_delete_head; |
|
575 |
/* |
|
576 |
* Add this back to the delete list to be retried later. |
|
577 |
* |
|
578 |
* XXX - this could just busy loop on us... |
|
579 |
*/ |
|
580 |
mutex_enter(&delq->z_mutex); |
|
581 |
list_insert_tail(&delq->z_znodes, zp); |
|
582 |
delq->z_znode_count++; |
|
583 |
mutex_exit(&delq->z_mutex); |
|
584 |
return; |
|
585 |
} |
|
586 |
||
587 |
/* |
|
588 |
* If the file has extended attributes, unlink the xattr dir. |
|
589 |
*/ |
|
590 |
if (zp->z_phys->zp_xattr) { |
|
591 |
error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); |
|
592 |
ASSERT(error == 0); |
|
593 |
} |
|
594 |
||
595 |
acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; |
|
596 |
||
597 |
/* |
|
598 |
* Set up the transaction. |
|
599 |
*/ |
|
600 |
tx = dmu_tx_create(os); |
|
601 |
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); |
|
1544 | 602 |
dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL); |
789 | 603 |
if (xzp) { |
604 |
dmu_tx_hold_bonus(tx, xzp->z_id); |
|
1544 | 605 |
dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, TRUE, NULL); |
789 | 606 |
} |
607 |
if (acl_obj) |
|
608 |
dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); |
|
609 |
error = dmu_tx_assign(tx, TXG_WAIT); |
|
610 |
if (error) { |
|
611 |
zfs_delete_t *delq = &zfsvfs->z_delete_head; |
|
612 |
||
613 |
dmu_tx_abort(tx); |
|
614 |
/* |
|
615 |
* Add this back to the delete list to be retried later. |
|
616 |
* |
|
617 |
* XXX - this could just busy loop on us... |
|
618 |
*/ |
|
619 |
mutex_enter(&delq->z_mutex); |
|
620 |
list_insert_tail(&delq->z_znodes, zp); |
|
621 |
delq->z_znode_count++; |
|
622 |
mutex_exit(&delq->z_mutex); |
|
623 |
return; |
|
624 |
} |
|
625 |
||
626 |
if (xzp) { |
|
627 |
dmu_buf_will_dirty(xzp->z_dbuf, tx); |
|
628 |
mutex_enter(&xzp->z_lock); |
|
629 |
xzp->z_reap = 1; /* mark xzp for deletion */ |
|
630 |
xzp->z_phys->zp_links = 0; /* no more links to it */ |
|
631 |
mutex_exit(&xzp->z_lock); |
|
632 |
zfs_dq_add(xzp, tx); /* add xzp to delete queue */ |
|
633 |
} |
|
634 |
||
635 |
/* |
|
636 |
* Remove this znode from delete queue |
|
637 |
*/ |
|
638 |
error = zap_remove(os, zfsvfs->z_dqueue, |
|
639 |
zfs_dq_hexname(obj_name, zp->z_id), tx); |
|
640 |
ASSERT3U(error, ==, 0); |
|
641 |
||
642 |
zfs_znode_delete(zp, tx); |
|
643 |
||
644 |
dmu_tx_commit(tx); |
|
645 |
||
646 |
if (xzp) |
|
647 |
VN_RELE(ZTOV(xzp)); |
|
648 |
} |
|
649 |
||
650 |
/* |
|
651 |
* Link zp into dl. Can only fail if zp has been reaped. |
|
652 |
*/ |
|
653 |
int |
|
654 |
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) |
|
655 |
{ |
|
656 |
znode_t *dzp = dl->dl_dzp; |
|
657 |
vnode_t *vp = ZTOV(zp); |
|
658 |
int zp_is_dir = (vp->v_type == VDIR); |
|
659 |
int error; |
|
660 |
||
661 |
dmu_buf_will_dirty(zp->z_dbuf, tx); |
|
662 |
mutex_enter(&zp->z_lock); |
|
663 |
||
664 |
if (!(flag & ZRENAMING)) { |
|
665 |
if (zp->z_reap) { /* no new links to reaped zp */ |
|
666 |
ASSERT(!(flag & (ZNEW | ZEXISTS))); |
|
667 |
mutex_exit(&zp->z_lock); |
|
668 |
return (ENOENT); |
|
669 |
} |
|
670 |
zp->z_phys->zp_links++; |
|
671 |
} |
|
672 |
zp->z_phys->zp_parent = dzp->z_id; /* dzp is now zp's parent */ |
|
673 |
||
674 |
if (!(flag & ZNEW)) |
|
675 |
zfs_time_stamper_locked(zp, STATE_CHANGED, tx); |
|
676 |
mutex_exit(&zp->z_lock); |
|
677 |
||
678 |
dmu_buf_will_dirty(dzp->z_dbuf, tx); |
|
679 |
mutex_enter(&dzp->z_lock); |
|
680 |
dzp->z_phys->zp_size++; /* one dirent added */ |
|
681 |
dzp->z_phys->zp_links += zp_is_dir; /* ".." link from zp */ |
|
682 |
zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx); |
|
683 |
mutex_exit(&dzp->z_lock); |
|
684 |
||
685 |
error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, |
|
686 |
8, 1, &zp->z_id, tx); |
|
687 |
ASSERT(error == 0); |
|
688 |
||
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
689 |
dnlc_update(ZTOV(dzp), dl->dl_name, vp); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
690 |
|
789 | 691 |
return (0); |
692 |
} |
|
693 |
||
694 |
/* |
|
695 |
* Unlink zp from dl, and mark zp for reaping if this was the last link. |
|
696 |
* Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). |
|
697 |
* If 'reaped_ptr' is NULL, we put reaped znodes on the delete queue. |
|
698 |
* If it's non-NULL, we use it to indicate whether the znode needs reaping, |
|
699 |
* and it's the caller's job to do it. |
|
700 |
*/ |
|
701 |
int |
|
702 |
zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, |
|
703 |
int *reaped_ptr) |
|
704 |
{ |
|
705 |
znode_t *dzp = dl->dl_dzp; |
|
706 |
vnode_t *vp = ZTOV(zp); |
|
707 |
int zp_is_dir = (vp->v_type == VDIR); |
|
708 |
int reaped = 0; |
|
709 |
int error; |
|
710 |
||
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
711 |
dnlc_remove(ZTOV(dzp), dl->dl_name); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1231
diff
changeset
|
712 |
|
789 | 713 |
if (!(flag & ZRENAMING)) { |
714 |
dmu_buf_will_dirty(zp->z_dbuf, tx); |
|
715 |
||
716 |
if (vn_vfswlock(vp)) /* prevent new mounts on zp */ |
|
717 |
return (EBUSY); |
|
718 |
||
719 |
if (vn_ismntpt(vp)) { /* don't remove mount point */ |
|
720 |
vn_vfsunlock(vp); |
|
721 |
return (EBUSY); |
|
722 |
} |
|
723 |
||
724 |
mutex_enter(&zp->z_lock); |
|
725 |
if (zp_is_dir && !zfs_dirempty(zp)) { /* dir not empty */ |
|
726 |
mutex_exit(&zp->z_lock); |
|
727 |
vn_vfsunlock(vp); |
|
728 |
return (EEXIST); |
|
729 |
} |
|
730 |
ASSERT(zp->z_phys->zp_links > zp_is_dir); |
|
731 |
if (--zp->z_phys->zp_links == zp_is_dir) { |
|
732 |
zp->z_reap = 1; |
|
733 |
zp->z_phys->zp_links = 0; |
|
734 |
reaped = 1; |
|
735 |
} else { |
|
736 |
zfs_time_stamper_locked(zp, STATE_CHANGED, tx); |
|
737 |
} |
|
738 |
mutex_exit(&zp->z_lock); |
|
739 |
vn_vfsunlock(vp); |
|
740 |
} |
|
741 |
||
742 |
dmu_buf_will_dirty(dzp->z_dbuf, tx); |
|
743 |
mutex_enter(&dzp->z_lock); |
|
744 |
dzp->z_phys->zp_size--; /* one dirent removed */ |
|
745 |
dzp->z_phys->zp_links -= zp_is_dir; /* ".." link from zp */ |
|
746 |
zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx); |
|
747 |
mutex_exit(&dzp->z_lock); |
|
748 |
||
749 |
error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, tx); |
|
750 |
ASSERT(error == 0); |
|
751 |
||
752 |
if (reaped_ptr != NULL) |
|
753 |
*reaped_ptr = reaped; |
|
754 |
else if (reaped) |
|
755 |
zfs_dq_add(zp, tx); |
|
756 |
||
757 |
return (0); |
|
758 |
} |
|
759 |
||
760 |
/* |
|
761 |
* Indicate whether the directory is empty. Works with or without z_lock |
|
762 |
* held, but can only be consider a hint in the latter case. Returns true |
|
763 |
* if only "." and ".." remain and there's no work in progress. |
|
764 |
*/ |
|
765 |
boolean_t |
|
766 |
zfs_dirempty(znode_t *dzp) |
|
767 |
{ |
|
768 |
return (dzp->z_phys->zp_size == 2 && dzp->z_dirlocks == 0); |
|
769 |
} |
|
770 |
||
771 |
int |
|
772 |
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) |
|
773 |
{ |
|
774 |
zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
|
775 |
znode_t *xzp; |
|
776 |
dmu_tx_t *tx; |
|
777 |
uint64_t xoid; |
|
778 |
int error; |
|
779 |
||
780 |
*xvpp = NULL; |
|
781 |
||
782 |
if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, cr)) |
|
783 |
return (error); |
|
784 |
||
785 |
tx = dmu_tx_create(zfsvfs->z_os); |
|
786 |
dmu_tx_hold_bonus(tx, zp->z_id); |
|
1544 | 787 |
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); |
789 | 788 |
error = dmu_tx_assign(tx, zfsvfs->z_assign); |
789 |
if (error) { |
|
2113
0510bb40c993
6430121 3-way deadlock involving tc_lock within zfs
ahrens
parents:
1878
diff
changeset
|
790 |
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) |
0510bb40c993
6430121 3-way deadlock involving tc_lock within zfs
ahrens
parents:
1878
diff
changeset
|
791 |
dmu_tx_wait(tx); |
789 | 792 |
dmu_tx_abort(tx); |
793 |
return (error); |
|
794 |
} |
|
795 |
zfs_mknode(zp, vap, &xoid, tx, cr, IS_XATTR, &xzp, 0); |
|
796 |
ASSERT(xzp->z_id == xoid); |
|
797 |
ASSERT(xzp->z_phys->zp_parent == zp->z_id); |
|
798 |
dmu_buf_will_dirty(zp->z_dbuf, tx); |
|
799 |
zp->z_phys->zp_xattr = xoid; |
|
800 |
||
801 |
(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, ""); |
|
802 |
dmu_tx_commit(tx); |
|
803 |
||
804 |
*xvpp = ZTOV(xzp); |
|
805 |
||
806 |
return (0); |
|
807 |
} |
|
808 |
||
809 |
/* |
|
810 |
* Return a znode for the extended attribute directory for zp. |
|
811 |
* ** If the directory does not already exist, it is created ** |
|
812 |
* |
|
813 |
* IN: zp - znode to obtain attribute directory from |
|
814 |
* cr - credentials of caller |
|
815 |
* |
|
816 |
* OUT: xzpp - pointer to extended attribute znode |
|
817 |
* |
|
818 |
* RETURN: 0 on success |
|
819 |
* error number on failure |
|
820 |
*/ |
|
821 |
int |
|
822 |
zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr) |
|
823 |
{ |
|
824 |
zfsvfs_t *zfsvfs = zp->z_zfsvfs; |
|
825 |
znode_t *xzp; |
|
826 |
zfs_dirlock_t *dl; |
|
827 |
vattr_t va; |
|
828 |
int error; |
|
829 |
top: |
|
830 |
error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR); |
|
831 |
if (error) |
|
832 |
return (error); |
|
833 |
||
834 |
if (xzp != NULL) { |
|
835 |
*xvpp = ZTOV(xzp); |
|
836 |
zfs_dirent_unlock(dl); |
|
837 |
return (0); |
|
838 |
} |
|
839 |
||
840 |
ASSERT(zp->z_phys->zp_xattr == 0); |
|
841 |
||
842 |
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { |
|
843 |
zfs_dirent_unlock(dl); |
|
844 |
return (EROFS); |
|
845 |
} |
|
846 |
||
847 |
/* |
|
848 |
* The ability to 'create' files in an attribute |
|
849 |
* directory comes from the write_xattr permission on the base file. |
|
850 |
* |
|
851 |
* The ability to 'search' an attribute directory requires |
|
852 |
* read_xattr permission on the base file. |
|
853 |
* |
|
854 |
* Once in a directory the ability to read/write attributes |
|
855 |
* is controlled by the permissions on the attribute file. |
|
856 |
*/ |
|
857 |
va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; |
|
858 |
va.va_type = VDIR; |
|
1231
64215f768e86
6354804 The file's ACL was changed when cp it from one ZFS file system to another one.
marks
parents:
885
diff
changeset
|
859 |
va.va_mode = S_IFDIR | S_ISVTX | 0777; |
789 | 860 |
va.va_uid = (uid_t)zp->z_phys->zp_uid; |
861 |
va.va_gid = (gid_t)zp->z_phys->zp_gid; |
|
862 |
||
863 |
error = zfs_make_xattrdir(zp, &va, xvpp, cr); |
|
864 |
zfs_dirent_unlock(dl); |
|
865 |
||
866 |
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { |
|
2113
0510bb40c993
6430121 3-way deadlock involving tc_lock within zfs
ahrens
parents:
1878
diff
changeset
|
867 |
/* NB: we already did dmu_tx_wait() if necessary */ |
789 | 868 |
goto top; |
869 |
} |
|
870 |
||
871 |
return (error); |
|
872 |
} |
|
873 |
||
874 |
/* |
|
875 |
* Decide whether it is okay to remove within a sticky directory. |
|
876 |
* |
|
877 |
* In sticky directories, write access is not sufficient; |
|
878 |
* you can remove entries from a directory only if: |
|
879 |
* |
|
880 |
* you own the directory, |
|
881 |
* you own the entry, |
|
882 |
* the entry is a plain file and you have write access, |
|
883 |
* or you are privileged (checked in secpolicy...). |
|
884 |
* |
|
885 |
* The function returns 0 if remove access is granted. |
|
886 |
*/ |
|
887 |
int |
|
888 |
zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) |
|
889 |
{ |
|
890 |
uid_t uid; |
|
891 |
||
892 |
if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL) /* ZIL replay */ |
|
893 |
return (0); |
|
894 |
||
895 |
if ((zdp->z_phys->zp_mode & S_ISVTX) == 0 || |
|
896 |
(uid = crgetuid(cr)) == zdp->z_phys->zp_uid || |
|
897 |
uid == zp->z_phys->zp_uid || |
|
898 |
(ZTOV(zp)->v_type == VREG && |
|
899 |
zfs_zaccess(zp, ACE_WRITE_DATA, cr) == 0)) |
|
900 |
return (0); |
|
901 |
else |
|
902 |
return (secpolicy_vnode_remove(cr)); |
|
903 |
} |