author | perrin |
Wed, 09 Apr 2008 19:59:39 -0700 | |
changeset 6396 | 09c523979832 |
parent 6101 | 915df4cedbc9 |
child 6514 | 852c82a1989c |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1669 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
6101
915df4cedbc9
6658511 zl_itx_list_sz incorrect for WR_NEED_COPY writes
perrin
parents:
5435
diff
changeset
|
22 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/types.h> |
|
29 |
#include <sys/param.h> |
|
30 |
#include <sys/systm.h> |
|
31 |
#include <sys/sysmacros.h> |
|
32 |
#include <sys/cmn_err.h> |
|
33 |
#include <sys/kmem.h> |
|
34 |
#include <sys/thread.h> |
|
35 |
#include <sys/file.h> |
|
36 |
#include <sys/vfs.h> |
|
37 |
#include <sys/zfs_znode.h> |
|
38 |
#include <sys/zfs_dir.h> |
|
39 |
#include <sys/zil.h> |
|
4620 | 40 |
#include <sys/zil_impl.h> |
789 | 41 |
#include <sys/byteorder.h> |
42 |
#include <sys/policy.h> |
|
43 |
#include <sys/stat.h> |
|
44 |
#include <sys/mode.h> |
|
45 |
#include <sys/acl.h> |
|
46 |
#include <sys/dmu.h> |
|
47 |
#include <sys/spa.h> |
|
5331 | 48 |
#include <sys/zfs_fuid.h> |
789 | 49 |
#include <sys/ddi.h> |
50 |
||
51 |
/* |
|
52 |
* All the functions in this file are used to construct the log entries |
|
5331 | 53 |
* to record transactions. They allocate * an intent log transaction |
789 | 54 |
* structure (itx_t) and save within it all the information necessary to |
55 |
* possibly replay the transaction. The itx is then assigned a sequence |
|
56 |
* number and inserted in the in-memory list anchored in the zilog. |
|
57 |
*/ |
|
58 |
||
5331 | 59 |
int |
60 |
zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) |
|
61 |
{ |
|
62 |
int isxvattr = (vap->va_mask & AT_XVATTR); |
|
63 |
switch (type) { |
|
64 |
case Z_FILE: |
|
65 |
if (vsecp == NULL && !isxvattr) |
|
66 |
return (TX_CREATE); |
|
67 |
if (vsecp && isxvattr) |
|
68 |
return (TX_CREATE_ACL_ATTR); |
|
69 |
if (vsecp) |
|
70 |
return (TX_CREATE_ACL); |
|
71 |
else |
|
72 |
return (TX_CREATE_ATTR); |
|
73 |
/*NOTREACHED*/ |
|
74 |
case Z_DIR: |
|
75 |
if (vsecp == NULL && !isxvattr) |
|
76 |
return (TX_MKDIR); |
|
77 |
if (vsecp && isxvattr) |
|
78 |
return (TX_MKDIR_ACL_ATTR); |
|
79 |
if (vsecp) |
|
80 |
return (TX_MKDIR_ACL); |
|
81 |
else |
|
82 |
return (TX_MKDIR_ATTR); |
|
83 |
case Z_XATTRDIR: |
|
84 |
return (TX_MKXATTR); |
|
85 |
} |
|
86 |
ASSERT(0); |
|
87 |
return (TX_MAX_TYPE); |
|
88 |
} |
|
89 |
||
789 | 90 |
/* |
5331 | 91 |
* build up the log data necessary for logging xvattr_t |
92 |
* First lr_attr_t is initialized. following the lr_attr_t |
|
93 |
* is the mapsize and attribute bitmap copied from the xvattr_t. |
|
94 |
* Following the bitmap and bitmapsize two 64 bit words are reserved |
|
95 |
* for the create time which may be set. Following the create time |
|
96 |
* records a single 64 bit integer which has the bits to set on |
|
97 |
* replay for the xvattr. |
|
98 |
*/ |
|
99 |
static void |
|
100 |
zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) |
|
101 |
{ |
|
102 |
uint32_t *bitmap; |
|
103 |
uint64_t *attrs; |
|
104 |
uint64_t *crtime; |
|
105 |
xoptattr_t *xoap; |
|
106 |
void *scanstamp; |
|
107 |
int i; |
|
108 |
||
109 |
xoap = xva_getxoptattr(xvap); |
|
110 |
ASSERT(xoap); |
|
111 |
||
112 |
lrattr->lr_attr_masksize = xvap->xva_mapsize; |
|
113 |
bitmap = &lrattr->lr_attr_bitmap; |
|
114 |
for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) { |
|
115 |
*bitmap = xvap->xva_reqattrmap[i]; |
|
116 |
} |
|
117 |
||
118 |
/* Now pack the attributes up in a single uint64_t */ |
|
119 |
attrs = (uint64_t *)bitmap; |
|
120 |
crtime = attrs + 1; |
|
121 |
scanstamp = (caddr_t)(crtime + 2); |
|
122 |
*attrs = 0; |
|
123 |
if (XVA_ISSET_REQ(xvap, XAT_READONLY)) |
|
124 |
*attrs |= (xoap->xoa_readonly == 0) ? 0 : |
|
125 |
XAT0_READONLY; |
|
126 |
if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) |
|
127 |
*attrs |= (xoap->xoa_hidden == 0) ? 0 : |
|
128 |
XAT0_HIDDEN; |
|
129 |
if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) |
|
130 |
*attrs |= (xoap->xoa_system == 0) ? 0 : |
|
131 |
XAT0_SYSTEM; |
|
132 |
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) |
|
133 |
*attrs |= (xoap->xoa_archive == 0) ? 0 : |
|
134 |
XAT0_ARCHIVE; |
|
135 |
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) |
|
136 |
*attrs |= (xoap->xoa_immutable == 0) ? 0 : |
|
137 |
XAT0_IMMUTABLE; |
|
138 |
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) |
|
139 |
*attrs |= (xoap->xoa_nounlink == 0) ? 0 : |
|
140 |
XAT0_NOUNLINK; |
|
141 |
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) |
|
142 |
*attrs |= (xoap->xoa_appendonly == 0) ? 0 : |
|
143 |
XAT0_APPENDONLY; |
|
144 |
if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) |
|
145 |
*attrs |= (xoap->xoa_opaque == 0) ? 0 : |
|
146 |
XAT0_APPENDONLY; |
|
147 |
if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) |
|
148 |
*attrs |= (xoap->xoa_nodump == 0) ? 0 : |
|
149 |
XAT0_NODUMP; |
|
150 |
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) |
|
151 |
*attrs |= (xoap->xoa_av_quarantined == 0) ? 0 : |
|
152 |
XAT0_AV_QUARANTINED; |
|
153 |
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) |
|
154 |
*attrs |= (xoap->xoa_av_modified == 0) ? 0 : |
|
155 |
XAT0_AV_MODIFIED; |
|
156 |
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) |
|
157 |
ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); |
|
158 |
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) |
|
159 |
bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); |
|
160 |
} |
|
161 |
||
162 |
static void * |
|
163 |
zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start) |
|
164 |
{ |
|
165 |
zfs_fuid_t *zfuid; |
|
166 |
uint64_t *fuidloc = start; |
|
167 |
||
168 |
/* First copy in the ACE FUIDs */ |
|
169 |
for (zfuid = list_head(&fuidp->z_fuids); zfuid; |
|
170 |
zfuid = list_next(&fuidp->z_fuids, zfuid)) { |
|
171 |
*fuidloc++ = zfuid->z_logfuid; |
|
172 |
} |
|
173 |
return (fuidloc); |
|
174 |
} |
|
175 |
||
176 |
||
177 |
static void * |
|
178 |
zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) |
|
179 |
{ |
|
180 |
zfs_fuid_domain_t *zdomain; |
|
181 |
||
182 |
/* now copy in the domain info, if any */ |
|
183 |
if (fuidp->z_domain_str_sz != 0) { |
|
184 |
for (zdomain = list_head(&fuidp->z_domains); zdomain; |
|
185 |
zdomain = list_next(&fuidp->z_domains, zdomain)) { |
|
186 |
bcopy((void *)zdomain->z_domain, start, |
|
187 |
strlen(zdomain->z_domain) + 1); |
|
188 |
start = (caddr_t)start + |
|
189 |
strlen(zdomain->z_domain) + 1; |
|
190 |
} |
|
191 |
} |
|
192 |
return (start); |
|
193 |
} |
|
194 |
||
195 |
/* |
|
196 |
* zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, |
|
197 |
* TX_MKDIR_ATTR and TX_MKXATTR |
|
789 | 198 |
* transactions. |
5331 | 199 |
* |
200 |
* TX_CREATE and TX_MKDIR are standard creates, but they may have FUID |
|
201 |
* domain information appended prior to the name. In this case the |
|
202 |
* uid/gid in the log record will be a log centric FUID. |
|
203 |
* |
|
204 |
* TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that |
|
205 |
* may contain attributes, ACL and optional fuid information. |
|
206 |
* |
|
207 |
* TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify |
|
208 |
* and ACL and normal users/groups in the ACEs. |
|
209 |
* |
|
210 |
* There may be an optional xvattr attribute information similar |
|
211 |
* to zfs_log_setattr. |
|
212 |
* |
|
213 |
* Also, after the file name "domain" strings may be appended. |
|
789 | 214 |
*/ |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
215 |
void |
5331 | 216 |
zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
217 |
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp, |
|
218 |
zfs_fuid_info_t *fuidp, vattr_t *vap) |
|
789 | 219 |
{ |
220 |
itx_t *itx; |
|
221 |
uint64_t seq; |
|
222 |
lr_create_t *lr; |
|
5331 | 223 |
lr_acl_create_t *lracl; |
224 |
size_t aclsize; |
|
225 |
size_t xvatsize = 0; |
|
226 |
size_t txsize; |
|
227 |
xvattr_t *xvap = (xvattr_t *)vap; |
|
228 |
void *end; |
|
229 |
size_t lrsize; |
|
230 |
||
789 | 231 |
size_t namesize = strlen(name) + 1; |
5331 | 232 |
size_t fuidsz = 0; |
789 | 233 |
|
234 |
if (zilog == NULL) |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
235 |
return; |
789 | 236 |
|
5331 | 237 |
/* |
238 |
* If we have FUIDs present then add in space for |
|
239 |
* domains and ACE fuid's if any. |
|
240 |
*/ |
|
241 |
if (fuidp) { |
|
242 |
fuidsz += fuidp->z_domain_str_sz; |
|
243 |
fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); |
|
244 |
} |
|
245 |
||
246 |
if (vap->va_mask & AT_XVATTR) |
|
247 |
xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); |
|
248 |
||
249 |
if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || |
|
250 |
(int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || |
|
251 |
(int)txtype == TX_MKXATTR) { |
|
252 |
txsize = sizeof (*lr) + namesize + fuidsz + xvatsize; |
|
253 |
lrsize = sizeof (*lr); |
|
254 |
} else { |
|
255 |
aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0; |
|
256 |
txsize = |
|
257 |
sizeof (lr_acl_create_t) + namesize + fuidsz + |
|
5435 | 258 |
ZIL_ACE_LENGTH(aclsize) + xvatsize; |
5331 | 259 |
lrsize = sizeof (lr_acl_create_t); |
260 |
} |
|
261 |
||
262 |
itx = zil_itx_create(txtype, txsize); |
|
263 |
||
789 | 264 |
lr = (lr_create_t *)&itx->itx_lr; |
265 |
lr->lr_doid = dzp->z_id; |
|
266 |
lr->lr_foid = zp->z_id; |
|
267 |
lr->lr_mode = zp->z_phys->zp_mode; |
|
5331 | 268 |
if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) { |
269 |
lr->lr_uid = (uint64_t)zp->z_phys->zp_uid; |
|
270 |
} else { |
|
271 |
lr->lr_uid = fuidp->z_fuid_owner; |
|
272 |
} |
|
273 |
if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) { |
|
274 |
lr->lr_gid = (uint64_t)zp->z_phys->zp_gid; |
|
275 |
} else { |
|
276 |
lr->lr_gid = fuidp->z_fuid_group; |
|
277 |
} |
|
789 | 278 |
lr->lr_gen = zp->z_phys->zp_gen; |
279 |
lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; |
|
280 |
lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; |
|
281 |
lr->lr_rdev = zp->z_phys->zp_rdev; |
|
5331 | 282 |
|
283 |
/* |
|
284 |
* Fill in xvattr info if any |
|
285 |
*/ |
|
286 |
if (vap->va_mask & AT_XVATTR) { |
|
287 |
zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); |
|
288 |
end = (caddr_t)lr + lrsize + xvatsize; |
|
289 |
} else { |
|
290 |
end = (caddr_t)lr + lrsize; |
|
291 |
} |
|
292 |
||
293 |
/* Now fill in any ACL info */ |
|
294 |
||
295 |
if (vsecp) { |
|
296 |
lracl = (lr_acl_create_t *)&itx->itx_lr; |
|
297 |
lracl->lr_aclcnt = vsecp->vsa_aclcnt; |
|
298 |
lracl->lr_acl_bytes = aclsize; |
|
299 |
lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; |
|
300 |
lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; |
|
301 |
if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS) |
|
302 |
lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; |
|
303 |
else |
|
304 |
lracl->lr_acl_flags = 0; |
|
305 |
||
306 |
bcopy(vsecp->vsa_aclentp, end, aclsize); |
|
5435 | 307 |
end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize); |
5331 | 308 |
} |
309 |
||
310 |
/* drop in FUID info */ |
|
311 |
if (fuidp) { |
|
312 |
end = zfs_log_fuid_ids(fuidp, end); |
|
313 |
end = zfs_log_fuid_domains(fuidp, end); |
|
314 |
} |
|
315 |
/* |
|
316 |
* Now place file name in log record |
|
317 |
*/ |
|
318 |
bcopy(name, end, namesize); |
|
789 | 319 |
|
320 |
seq = zil_itx_assign(zilog, itx, tx); |
|
321 |
dzp->z_last_itx = seq; |
|
322 |
zp->z_last_itx = seq; |
|
323 |
} |
|
324 |
||
325 |
/* |
|
326 |
* zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions. |
|
327 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
328 |
void |
5331 | 329 |
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
789 | 330 |
znode_t *dzp, char *name) |
331 |
{ |
|
332 |
itx_t *itx; |
|
333 |
uint64_t seq; |
|
334 |
lr_remove_t *lr; |
|
335 |
size_t namesize = strlen(name) + 1; |
|
336 |
||
337 |
if (zilog == NULL) |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
338 |
return; |
789 | 339 |
|
340 |
itx = zil_itx_create(txtype, sizeof (*lr) + namesize); |
|
341 |
lr = (lr_remove_t *)&itx->itx_lr; |
|
342 |
lr->lr_doid = dzp->z_id; |
|
343 |
bcopy(name, (char *)(lr + 1), namesize); |
|
344 |
||
345 |
seq = zil_itx_assign(zilog, itx, tx); |
|
346 |
dzp->z_last_itx = seq; |
|
347 |
} |
|
348 |
||
349 |
/* |
|
350 |
* zfs_log_link() handles TX_LINK transactions. |
|
351 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
352 |
void |
5331 | 353 |
zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
789 | 354 |
znode_t *dzp, znode_t *zp, char *name) |
355 |
{ |
|
356 |
itx_t *itx; |
|
357 |
uint64_t seq; |
|
358 |
lr_link_t *lr; |
|
359 |
size_t namesize = strlen(name) + 1; |
|
360 |
||
361 |
if (zilog == NULL) |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
362 |
return; |
789 | 363 |
|
364 |
itx = zil_itx_create(txtype, sizeof (*lr) + namesize); |
|
365 |
lr = (lr_link_t *)&itx->itx_lr; |
|
366 |
lr->lr_doid = dzp->z_id; |
|
367 |
lr->lr_link_obj = zp->z_id; |
|
368 |
bcopy(name, (char *)(lr + 1), namesize); |
|
369 |
||
370 |
seq = zil_itx_assign(zilog, itx, tx); |
|
371 |
dzp->z_last_itx = seq; |
|
372 |
zp->z_last_itx = seq; |
|
373 |
} |
|
374 |
||
375 |
/* |
|
376 |
* zfs_log_symlink() handles TX_SYMLINK transactions. |
|
377 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
378 |
void |
5331 | 379 |
zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
380 |
znode_t *dzp, znode_t *zp, char *name, char *link) |
|
789 | 381 |
{ |
382 |
itx_t *itx; |
|
383 |
uint64_t seq; |
|
384 |
lr_create_t *lr; |
|
385 |
size_t namesize = strlen(name) + 1; |
|
386 |
size_t linksize = strlen(link) + 1; |
|
387 |
||
388 |
if (zilog == NULL) |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
389 |
return; |
789 | 390 |
|
391 |
itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); |
|
392 |
lr = (lr_create_t *)&itx->itx_lr; |
|
393 |
lr->lr_doid = dzp->z_id; |
|
394 |
lr->lr_foid = zp->z_id; |
|
395 |
lr->lr_mode = zp->z_phys->zp_mode; |
|
396 |
lr->lr_uid = zp->z_phys->zp_uid; |
|
397 |
lr->lr_gid = zp->z_phys->zp_gid; |
|
398 |
lr->lr_gen = zp->z_phys->zp_gen; |
|
399 |
lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; |
|
400 |
lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; |
|
401 |
bcopy(name, (char *)(lr + 1), namesize); |
|
402 |
bcopy(link, (char *)(lr + 1) + namesize, linksize); |
|
403 |
||
404 |
seq = zil_itx_assign(zilog, itx, tx); |
|
405 |
dzp->z_last_itx = seq; |
|
406 |
zp->z_last_itx = seq; |
|
407 |
} |
|
408 |
||
409 |
/* |
|
410 |
* zfs_log_rename() handles TX_RENAME transactions. |
|
411 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
412 |
void |
5331 | 413 |
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, |
789 | 414 |
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) |
415 |
{ |
|
416 |
itx_t *itx; |
|
417 |
uint64_t seq; |
|
418 |
lr_rename_t *lr; |
|
419 |
size_t snamesize = strlen(sname) + 1; |
|
420 |
size_t dnamesize = strlen(dname) + 1; |
|
421 |
||
422 |
if (zilog == NULL) |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
423 |
return; |
789 | 424 |
|
425 |
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); |
|
426 |
lr = (lr_rename_t *)&itx->itx_lr; |
|
427 |
lr->lr_sdoid = sdzp->z_id; |
|
428 |
lr->lr_tdoid = tdzp->z_id; |
|
429 |
bcopy(sname, (char *)(lr + 1), snamesize); |
|
430 |
bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); |
|
431 |
||
432 |
seq = zil_itx_assign(zilog, itx, tx); |
|
433 |
sdzp->z_last_itx = seq; |
|
434 |
tdzp->z_last_itx = seq; |
|
435 |
szp->z_last_itx = seq; |
|
436 |
} |
|
437 |
||
438 |
/* |
|
439 |
* zfs_log_write() handles TX_WRITE transactions. |
|
440 |
*/ |
|
2237 | 441 |
ssize_t zfs_immediate_write_sz = 32768; |
789 | 442 |
|
4620 | 443 |
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \ |
444 |
sizeof (lr_write_t)) |
|
445 |
||
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
446 |
void |
789 | 447 |
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
4620 | 448 |
znode_t *zp, offset_t off, ssize_t resid, int ioflag) |
789 | 449 |
{ |
1669 | 450 |
itx_wr_state_t write_state; |
4620 | 451 |
boolean_t slogging; |
4720
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
452 |
uintptr_t fsync_cnt; |
789 | 453 |
|
3461 | 454 |
if (zilog == NULL || zp->z_unlinked) |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
455 |
return; |
789 | 456 |
|
1669 | 457 |
/* |
458 |
* Writes are handled in three different ways: |
|
459 |
* |
|
460 |
* WR_INDIRECT: |
|
4620 | 461 |
* If the write is greater than zfs_immediate_write_sz and there are |
462 |
* no separate logs in this pool then later *if* we need to log the |
|
463 |
* write then dmu_sync() is used to immediately write the block and |
|
464 |
* its block pointer is put in the log record. |
|
1669 | 465 |
* WR_COPIED: |
466 |
* If we know we'll immediately be committing the |
|
6396
09c523979832
6683293 concurrent O_DSYNC writes to a fileset can be much improved over NFS
perrin
parents:
6101
diff
changeset
|
467 |
* transaction (FSYNC or FDSYNC), the we allocate a larger |
1669 | 468 |
* log record here for the data and copy the data in. |
469 |
* WR_NEED_COPY: |
|
470 |
* Otherwise we don't allocate a buffer, and *if* we need to |
|
471 |
* flush the write later then a buffer is allocated and |
|
472 |
* we retrieve the data using the dmu. |
|
473 |
*/ |
|
4620 | 474 |
slogging = spa_has_slogs(zilog->zl_spa); |
475 |
if (resid > zfs_immediate_write_sz && !slogging) |
|
1669 | 476 |
write_state = WR_INDIRECT; |
6396
09c523979832
6683293 concurrent O_DSYNC writes to a fileset can be much improved over NFS
perrin
parents:
6101
diff
changeset
|
477 |
else if (ioflag & (FSYNC | FDSYNC)) |
1669 | 478 |
write_state = WR_COPIED; |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
479 |
else |
1669 | 480 |
write_state = WR_NEED_COPY; |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
481 |
|
4720
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
482 |
if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { |
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
483 |
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); |
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
484 |
} |
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
485 |
|
4620 | 486 |
while (resid) { |
487 |
itx_t *itx; |
|
488 |
lr_write_t *lr; |
|
489 |
ssize_t len; |
|
490 |
||
491 |
/* |
|
492 |
* If there are slogs and the write would overflow the largest |
|
493 |
* block, then because we don't want to use the main pool |
|
494 |
* to dmu_sync, we have to split the write. |
|
495 |
*/ |
|
496 |
if (slogging && resid > ZIL_MAX_LOG_DATA) |
|
497 |
len = SPA_MAXBLOCKSIZE >> 1; |
|
498 |
else |
|
499 |
len = resid; |
|
500 |
||
501 |
itx = zil_itx_create(txtype, sizeof (*lr) + |
|
502 |
(write_state == WR_COPIED ? len : 0)); |
|
503 |
lr = (lr_write_t *)&itx->itx_lr; |
|
504 |
if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, |
|
505 |
zp->z_id, off, len, lr + 1) != 0) { |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
506 |
kmem_free(itx, offsetof(itx_t, itx_lr) + |
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
507 |
itx->itx_lr.lrc_reclen); |
1669 | 508 |
itx = zil_itx_create(txtype, sizeof (*lr)); |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
509 |
lr = (lr_write_t *)&itx->itx_lr; |
1669 | 510 |
write_state = WR_NEED_COPY; |
511 |
} |
|
4620 | 512 |
|
513 |
itx->itx_wr_state = write_state; |
|
6101
915df4cedbc9
6658511 zl_itx_list_sz incorrect for WR_NEED_COPY writes
perrin
parents:
5435
diff
changeset
|
514 |
if (write_state == WR_NEED_COPY) |
915df4cedbc9
6658511 zl_itx_list_sz incorrect for WR_NEED_COPY writes
perrin
parents:
5435
diff
changeset
|
515 |
itx->itx_sod += len; |
4620 | 516 |
lr->lr_foid = zp->z_id; |
517 |
lr->lr_offset = off; |
|
518 |
lr->lr_length = len; |
|
519 |
lr->lr_blkoff = 0; |
|
520 |
BP_ZERO(&lr->lr_blkptr); |
|
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
521 |
|
4620 | 522 |
itx->itx_private = zp->z_zfsvfs; |
789 | 523 |
|
6396
09c523979832
6683293 concurrent O_DSYNC writes to a fileset can be much improved over NFS
perrin
parents:
6101
diff
changeset
|
524 |
if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) || |
09c523979832
6683293 concurrent O_DSYNC writes to a fileset can be much improved over NFS
perrin
parents:
6101
diff
changeset
|
525 |
(ioflag & (FSYNC | FDSYNC))) |
4720
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
526 |
itx->itx_sync = B_TRUE; |
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
527 |
else |
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
528 |
itx->itx_sync = B_FALSE; |
8edc0d2e6f3f
6535160 Lock contention on zl_lock from zil_commit
fr157268
parents:
4620
diff
changeset
|
529 |
|
4620 | 530 |
zp->z_last_itx = zil_itx_assign(zilog, itx, tx); |
789 | 531 |
|
4620 | 532 |
off += len; |
533 |
resid -= len; |
|
534 |
} |
|
789 | 535 |
} |
536 |
||
537 |
/* |
|
538 |
* zfs_log_truncate() handles TX_TRUNCATE transactions. |
|
539 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
540 |
void |
789 | 541 |
zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
542 |
znode_t *zp, uint64_t off, uint64_t len) |
|
543 |
{ |
|
544 |
itx_t *itx; |
|
545 |
uint64_t seq; |
|
546 |
lr_truncate_t *lr; |
|
547 |
||
3461 | 548 |
if (zilog == NULL || zp->z_unlinked) |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
549 |
return; |
789 | 550 |
|
551 |
itx = zil_itx_create(txtype, sizeof (*lr)); |
|
552 |
lr = (lr_truncate_t *)&itx->itx_lr; |
|
553 |
lr->lr_foid = zp->z_id; |
|
554 |
lr->lr_offset = off; |
|
555 |
lr->lr_length = len; |
|
556 |
||
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2638
diff
changeset
|
557 |
itx->itx_sync = (zp->z_sync_cnt != 0); |
789 | 558 |
seq = zil_itx_assign(zilog, itx, tx); |
559 |
zp->z_last_itx = seq; |
|
560 |
} |
|
561 |
||
562 |
/* |
|
563 |
* zfs_log_setattr() handles TX_SETATTR transactions. |
|
564 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
565 |
void |
789 | 566 |
zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
5331 | 567 |
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) |
789 | 568 |
{ |
5331 | 569 |
itx_t *itx; |
570 |
uint64_t seq; |
|
571 |
lr_setattr_t *lr; |
|
572 |
xvattr_t *xvap = (xvattr_t *)vap; |
|
573 |
size_t recsize = sizeof (lr_setattr_t); |
|
574 |
void *start; |
|
575 |
||
789 | 576 |
|
3461 | 577 |
if (zilog == NULL || zp->z_unlinked) |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
578 |
return; |
789 | 579 |
|
5331 | 580 |
/* |
581 |
* If XVATTR set, then log record size needs to allow |
|
582 |
* for lr_attr_t + xvattr mask, mapsize and create time |
|
583 |
* plus actual attribute values |
|
584 |
*/ |
|
585 |
if (vap->va_mask & AT_XVATTR) |
|
586 |
recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); |
|
587 |
||
588 |
if (fuidp) |
|
589 |
recsize += fuidp->z_domain_str_sz; |
|
590 |
||
591 |
itx = zil_itx_create(txtype, recsize); |
|
789 | 592 |
lr = (lr_setattr_t *)&itx->itx_lr; |
593 |
lr->lr_foid = zp->z_id; |
|
594 |
lr->lr_mask = (uint64_t)mask_applied; |
|
595 |
lr->lr_mode = (uint64_t)vap->va_mode; |
|
5331 | 596 |
if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) |
597 |
lr->lr_uid = fuidp->z_fuid_owner; |
|
598 |
else |
|
599 |
lr->lr_uid = (uint64_t)vap->va_uid; |
|
600 |
||
601 |
if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) |
|
602 |
lr->lr_gid = fuidp->z_fuid_group; |
|
603 |
else |
|
604 |
lr->lr_gid = (uint64_t)vap->va_gid; |
|
605 |
||
789 | 606 |
lr->lr_size = (uint64_t)vap->va_size; |
607 |
ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); |
|
608 |
ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); |
|
5331 | 609 |
start = (lr_setattr_t *)(lr + 1); |
610 |
if (vap->va_mask & AT_XVATTR) { |
|
611 |
zfs_log_xvattr((lr_attr_t *)start, xvap); |
|
612 |
start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); |
|
613 |
} |
|
614 |
||
615 |
/* |
|
616 |
* Now stick on domain information if any on end |
|
617 |
*/ |
|
618 |
||
619 |
if (fuidp) |
|
620 |
(void) zfs_log_fuid_domains(fuidp, start); |
|
789 | 621 |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2638
diff
changeset
|
622 |
itx->itx_sync = (zp->z_sync_cnt != 0); |
789 | 623 |
seq = zil_itx_assign(zilog, itx, tx); |
624 |
zp->z_last_itx = seq; |
|
625 |
} |
|
626 |
||
627 |
/* |
|
628 |
* zfs_log_acl() handles TX_ACL transactions. |
|
629 |
*/ |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
630 |
void |
5331 | 631 |
zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, |
632 |
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) |
|
789 | 633 |
{ |
634 |
itx_t *itx; |
|
635 |
uint64_t seq; |
|
5331 | 636 |
lr_acl_v0_t *lrv0; |
789 | 637 |
lr_acl_t *lr; |
5331 | 638 |
int txtype; |
639 |
int lrsize; |
|
640 |
size_t txsize; |
|
641 |
size_t aclbytes = vsecp->vsa_aclentsz; |
|
642 |
||
643 |
txtype = (zp->z_zfsvfs->z_version == ZPL_VERSION_INITIAL) ? |
|
644 |
TX_ACL_V0 : TX_ACL; |
|
645 |
||
646 |
if (txtype == TX_ACL) |
|
647 |
lrsize = sizeof (*lr); |
|
648 |
else |
|
649 |
lrsize = sizeof (*lrv0); |
|
789 | 650 |
|
3461 | 651 |
if (zilog == NULL || zp->z_unlinked) |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2237
diff
changeset
|
652 |
return; |
789 | 653 |
|
5435 | 654 |
txsize = lrsize + |
655 |
((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) + |
|
656 |
(fuidp ? fuidp->z_domain_str_sz : 0) + |
|
5331 | 657 |
sizeof (uint64) * (fuidp ? fuidp->z_fuid_cnt : 0); |
658 |
||
659 |
itx = zil_itx_create(txtype, txsize); |
|
660 |
||
789 | 661 |
lr = (lr_acl_t *)&itx->itx_lr; |
662 |
lr->lr_foid = zp->z_id; |
|
5331 | 663 |
if (txtype == TX_ACL) { |
664 |
lr->lr_acl_bytes = aclbytes; |
|
665 |
lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; |
|
666 |
lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; |
|
667 |
if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) |
|
668 |
lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; |
|
669 |
else |
|
670 |
lr->lr_acl_flags = 0; |
|
671 |
} |
|
672 |
lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt; |
|
673 |
||
674 |
if (txtype == TX_ACL_V0) { |
|
675 |
lrv0 = (lr_acl_v0_t *)lr; |
|
676 |
bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes); |
|
677 |
} else { |
|
678 |
void *start = (ace_t *)(lr + 1); |
|
679 |
||
680 |
bcopy(vsecp->vsa_aclentp, start, aclbytes); |
|
681 |
||
5435 | 682 |
start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); |
5331 | 683 |
|
684 |
if (fuidp) { |
|
685 |
start = zfs_log_fuid_ids(fuidp, start); |
|
686 |
(void) zfs_log_fuid_domains(fuidp, start); |
|
687 |
} |
|
688 |
} |
|
789 | 689 |
|
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2638
diff
changeset
|
690 |
itx->itx_sync = (zp->z_sync_cnt != 0); |
789 | 691 |
seq = zil_itx_assign(zilog, itx, tx); |
692 |
zp->z_last_itx = seq; |
|
693 |
} |