author | Eric Schrock <Eric.Schrock@Sun.COM> |
Thu, 05 Nov 2009 08:59:19 -0800 | |
changeset 10960 | dcc7d6f9faa8 |
parent 10922 | e2081f502306 |
child 11249 | 6c30f7dfc97b |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1816
8c14b56c8515
6408482 64-bit system can't read some 32-bit dev_ts created on zfs
marks
parents:
789
diff
changeset
|
5 |
* Common Development and Distribution License (the "License"). |
8c14b56c8515
6408482 64-bit system can't read some 32-bit dev_ts created on zfs
marks
parents:
789
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
10793
34709091de6d
6886081 Solaris needs reparse point support (PSARC 2009/387)
Dai Ngo <dai.ngo@sun.com>
parents:
6514
diff
changeset
|
22 |
* Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#include <sys/types.h> |
|
27 |
#include <sys/param.h> |
|
28 |
#include <sys/systm.h> |
|
29 |
#include <sys/sysmacros.h> |
|
30 |
#include <sys/cmn_err.h> |
|
31 |
#include <sys/kmem.h> |
|
32 |
#include <sys/thread.h> |
|
33 |
#include <sys/file.h> |
|
34 |
#include <sys/fcntl.h> |
|
35 |
#include <sys/vfs.h> |
|
36 |
#include <sys/fs/zfs.h> |
|
37 |
#include <sys/zfs_znode.h> |
|
38 |
#include <sys/zfs_dir.h> |
|
39 |
#include <sys/zfs_acl.h> |
|
5331 | 40 |
#include <sys/zfs_fuid.h> |
789 | 41 |
#include <sys/spa.h> |
42 |
#include <sys/zil.h> |
|
43 |
#include <sys/byteorder.h> |
|
44 |
#include <sys/stat.h> |
|
45 |
#include <sys/mode.h> |
|
46 |
#include <sys/acl.h> |
|
47 |
#include <sys/atomic.h> |
|
48 |
#include <sys/cred.h> |
|
49 |
||
50 |
/* |
|
51 |
* Functions to replay ZFS intent log (ZIL) records |
|
52 |
* The functions are called through a function vector (zfs_replay_vector) |
|
53 |
* which is indexed by the transaction type. |
|
54 |
*/ |
|
55 |
||
56 |
static void |
|
57 |
zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, |
|
58 |
uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) |
|
59 |
{ |
|
60 |
bzero(vap, sizeof (*vap)); |
|
61 |
vap->va_mask = (uint_t)mask; |
|
62 |
vap->va_type = IFTOVT(mode); |
|
63 |
vap->va_mode = mode & MODEMASK; |
|
5331 | 64 |
vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; |
65 |
vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; |
|
1816
8c14b56c8515
6408482 64-bit system can't read some 32-bit dev_ts created on zfs
marks
parents:
789
diff
changeset
|
66 |
vap->va_rdev = zfs_cmpldev(rdev); |
789 | 67 |
vap->va_nodeid = nodeid; |
68 |
} |
|
69 |
||
70 |
/* ARGSUSED */ |
|
71 |
static int |
|
72 |
zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap) |
|
73 |
{ |
|
74 |
return (ENOTSUP); |
|
75 |
} |
|
76 |
||
5331 | 77 |
static void |
78 |
zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) |
|
79 |
{ |
|
80 |
xoptattr_t *xoap = NULL; |
|
81 |
uint64_t *attrs; |
|
82 |
uint64_t *crtime; |
|
5435 | 83 |
uint32_t *bitmap; |
5331 | 84 |
void *scanstamp; |
5435 | 85 |
int i; |
5331 | 86 |
|
87 |
xvap->xva_vattr.va_mask |= AT_XVATTR; |
|
88 |
if ((xoap = xva_getxoptattr(xvap)) == NULL) { |
|
89 |
xvap->xva_vattr.va_mask &= ~AT_XVATTR; /* shouldn't happen */ |
|
90 |
return; |
|
91 |
} |
|
92 |
||
93 |
ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize); |
|
5435 | 94 |
|
95 |
bitmap = &lrattr->lr_attr_bitmap; |
|
96 |
for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++) |
|
97 |
xvap->xva_reqattrmap[i] = *bitmap; |
|
98 |
||
5331 | 99 |
attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1); |
100 |
crtime = attrs + 1; |
|
101 |
scanstamp = (caddr_t)(crtime + 2); |
|
102 |
||
103 |
if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) |
|
104 |
xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0); |
|
105 |
if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) |
|
106 |
xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0); |
|
107 |
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) |
|
108 |
xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0); |
|
109 |
if (XVA_ISSET_REQ(xvap, XAT_READONLY)) |
|
110 |
xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0); |
|
111 |
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) |
|
112 |
xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0); |
|
113 |
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) |
|
114 |
xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0); |
|
115 |
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) |
|
116 |
xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0); |
|
117 |
if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) |
|
118 |
xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0); |
|
119 |
if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) |
|
120 |
xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0); |
|
121 |
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) |
|
122 |
xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0); |
|
123 |
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) |
|
124 |
xoap->xoa_av_quarantined = |
|
125 |
((*attrs & XAT0_AV_QUARANTINED) != 0); |
|
126 |
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) |
|
127 |
ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime); |
|
128 |
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) |
|
129 |
bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ); |
|
10793
34709091de6d
6886081 Solaris needs reparse point support (PSARC 2009/387)
Dai Ngo <dai.ngo@sun.com>
parents:
6514
diff
changeset
|
130 |
if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) |
34709091de6d
6886081 Solaris needs reparse point support (PSARC 2009/387)
Dai Ngo <dai.ngo@sun.com>
parents:
6514
diff
changeset
|
131 |
xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0); |
5331 | 132 |
} |
133 |
||
789 | 134 |
static int |
5331 | 135 |
zfs_replay_domain_cnt(uint64_t uid, uint64_t gid) |
136 |
{ |
|
137 |
uint64_t uid_idx; |
|
138 |
uint64_t gid_idx; |
|
139 |
int domcnt = 0; |
|
140 |
||
141 |
uid_idx = FUID_INDEX(uid); |
|
142 |
gid_idx = FUID_INDEX(gid); |
|
143 |
if (uid_idx) |
|
144 |
domcnt++; |
|
145 |
if (gid_idx > 0 && gid_idx != uid_idx) |
|
146 |
domcnt++; |
|
147 |
||
148 |
return (domcnt); |
|
149 |
} |
|
150 |
||
151 |
static void * |
|
152 |
zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start, |
|
153 |
int domcnt) |
|
154 |
{ |
|
155 |
int i; |
|
156 |
||
157 |
for (i = 0; i != domcnt; i++) { |
|
158 |
fuid_infop->z_domain_table[i] = start; |
|
159 |
start = (caddr_t)start + strlen(start) + 1; |
|
160 |
} |
|
161 |
||
162 |
return (start); |
|
163 |
} |
|
164 |
||
165 |
/* |
|
166 |
* Set the uid/gid in the fuid_info structure. |
|
167 |
*/ |
|
168 |
static void |
|
169 |
zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid) |
|
789 | 170 |
{ |
5331 | 171 |
/* |
172 |
* If owner or group are log specific FUIDs then slurp up |
|
173 |
* domain information and build zfs_fuid_info_t |
|
174 |
*/ |
|
175 |
if (IS_EPHEMERAL(uid)) |
|
176 |
fuid_infop->z_fuid_owner = uid; |
|
177 |
||
178 |
if (IS_EPHEMERAL(gid)) |
|
179 |
fuid_infop->z_fuid_group = gid; |
|
180 |
} |
|
181 |
||
182 |
/* |
|
183 |
* Load fuid domains into fuid_info_t |
|
184 |
*/ |
|
185 |
static zfs_fuid_info_t * |
|
186 |
zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid) |
|
187 |
{ |
|
188 |
int domcnt; |
|
189 |
||
190 |
zfs_fuid_info_t *fuid_infop; |
|
191 |
||
192 |
fuid_infop = zfs_fuid_info_alloc(); |
|
193 |
||
194 |
domcnt = zfs_replay_domain_cnt(uid, gid); |
|
195 |
||
196 |
if (domcnt == 0) |
|
197 |
return (fuid_infop); |
|
198 |
||
199 |
fuid_infop->z_domain_table = |
|
200 |
kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP); |
|
201 |
||
202 |
zfs_replay_fuid_ugid(fuid_infop, uid, gid); |
|
203 |
||
204 |
fuid_infop->z_domain_cnt = domcnt; |
|
205 |
*end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt); |
|
206 |
return (fuid_infop); |
|
207 |
} |
|
208 |
||
209 |
/* |
|
210 |
* load zfs_fuid_t's and fuid_domains into fuid_info_t |
|
211 |
*/ |
|
212 |
static zfs_fuid_info_t * |
|
213 |
zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid, |
|
214 |
uint64_t gid) |
|
215 |
{ |
|
216 |
uint64_t *log_fuid = (uint64_t *)start; |
|
217 |
zfs_fuid_info_t *fuid_infop; |
|
218 |
int i; |
|
219 |
||
220 |
fuid_infop = zfs_fuid_info_alloc(); |
|
221 |
fuid_infop->z_domain_cnt = domcnt; |
|
222 |
||
223 |
fuid_infop->z_domain_table = |
|
224 |
kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP); |
|
225 |
||
226 |
for (i = 0; i != idcnt; i++) { |
|
227 |
zfs_fuid_t *zfuid; |
|
228 |
||
229 |
zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP); |
|
230 |
zfuid->z_logfuid = *log_fuid; |
|
231 |
zfuid->z_id = -1; |
|
232 |
zfuid->z_domidx = 0; |
|
233 |
list_insert_tail(&fuid_infop->z_fuids, zfuid); |
|
234 |
log_fuid++; |
|
235 |
} |
|
236 |
||
237 |
zfs_replay_fuid_ugid(fuid_infop, uid, gid); |
|
238 |
||
239 |
*end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt); |
|
240 |
return (fuid_infop); |
|
241 |
} |
|
242 |
||
243 |
static void |
|
244 |
zfs_replay_swap_attrs(lr_attr_t *lrattr) |
|
245 |
{ |
|
246 |
/* swap the lr_attr structure */ |
|
247 |
byteswap_uint32_array(lrattr, sizeof (*lrattr)); |
|
248 |
/* swap the bitmap */ |
|
5435 | 249 |
byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) * |
250 |
sizeof (uint32_t)); |
|
5331 | 251 |
/* swap the attributes, create time + 64 bit word for attributes */ |
5435 | 252 |
byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) * |
5331 | 253 |
(lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t)); |
254 |
} |
|
255 |
||
256 |
/* |
|
257 |
* Replay file create with optional ACL, xvattr information as well |
|
258 |
* as option FUID information. |
|
259 |
*/ |
|
260 |
static int |
|
261 |
zfs_replay_create_acl(zfsvfs_t *zfsvfs, |
|
262 |
lr_acl_create_t *lracl, boolean_t byteswap) |
|
263 |
{ |
|
264 |
char *name = NULL; /* location determined later */ |
|
265 |
lr_create_t *lr = (lr_create_t *)lracl; |
|
789 | 266 |
znode_t *dzp; |
267 |
vnode_t *vp = NULL; |
|
5331 | 268 |
xvattr_t xva; |
269 |
int vflg = 0; |
|
270 |
vsecattr_t vsec = { 0 }; |
|
271 |
lr_attr_t *lrattr; |
|
272 |
void *aclstart; |
|
273 |
void *fuidstart; |
|
274 |
size_t xvatlen = 0; |
|
275 |
uint64_t txtype; |
|
789 | 276 |
int error; |
277 |
||
5331 | 278 |
if (byteswap) { |
279 |
byteswap_uint64_array(lracl, sizeof (*lracl)); |
|
280 |
txtype = (int)lr->lr_common.lrc_txtype; |
|
281 |
if (txtype == TX_CREATE_ACL_ATTR || |
|
282 |
txtype == TX_MKDIR_ACL_ATTR) { |
|
5435 | 283 |
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); |
5331 | 284 |
zfs_replay_swap_attrs(lrattr); |
285 |
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); |
|
286 |
} |
|
287 |
||
288 |
aclstart = (caddr_t)(lracl + 1) + xvatlen; |
|
289 |
zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE); |
|
290 |
/* swap fuids */ |
|
291 |
if (lracl->lr_fuidcnt) { |
|
292 |
byteswap_uint64_array((caddr_t)aclstart + |
|
5435 | 293 |
ZIL_ACE_LENGTH(lracl->lr_acl_bytes), |
294 |
lracl->lr_fuidcnt * sizeof (uint64_t)); |
|
5331 | 295 |
} |
296 |
} |
|
789 | 297 |
|
298 |
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
|
299 |
return (error); |
|
300 |
||
5331 | 301 |
xva_init(&xva); |
302 |
zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID, |
|
789 | 303 |
lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); |
304 |
||
305 |
/* |
|
306 |
* All forms of zfs create (create, mkdir, mkxattrdir, symlink) |
|
307 |
* eventually end up in zfs_mknode(), which assigns the object's |
|
308 |
* creation time and generation number. The generic VOP_CREATE() |
|
309 |
* doesn't have either concept, so we smuggle the values inside |
|
310 |
* the vattr's otherwise unused va_ctime and va_nblocks fields. |
|
311 |
*/ |
|
5331 | 312 |
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); |
313 |
xva.xva_vattr.va_nblocks = lr->lr_gen; |
|
314 |
||
315 |
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); |
|
316 |
if (error != ENOENT) |
|
317 |
goto bail; |
|
318 |
||
319 |
if (lr->lr_common.lrc_txtype & TX_CI) |
|
320 |
vflg |= FIGNORECASE; |
|
321 |
switch ((int)lr->lr_common.lrc_txtype) { |
|
322 |
case TX_CREATE_ACL: |
|
323 |
aclstart = (caddr_t)(lracl + 1); |
|
5435 | 324 |
fuidstart = (caddr_t)aclstart + |
325 |
ZIL_ACE_LENGTH(lracl->lr_acl_bytes); |
|
5331 | 326 |
zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, |
327 |
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, |
|
328 |
lr->lr_uid, lr->lr_gid); |
|
329 |
/*FALLTHROUGH*/ |
|
330 |
case TX_CREATE_ACL_ATTR: |
|
331 |
if (name == NULL) { |
|
332 |
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); |
|
333 |
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); |
|
334 |
xva.xva_vattr.va_mask |= AT_XVATTR; |
|
335 |
zfs_replay_xvattr(lrattr, &xva); |
|
336 |
} |
|
337 |
vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; |
|
338 |
vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; |
|
339 |
vsec.vsa_aclcnt = lracl->lr_aclcnt; |
|
340 |
vsec.vsa_aclentsz = lracl->lr_acl_bytes; |
|
341 |
vsec.vsa_aclflags = lracl->lr_acl_flags; |
|
5833
1a2bbcec3b3f
6649841 zfs_replay_create_acl() has improper indentation
marks
parents:
5435
diff
changeset
|
342 |
if (zfsvfs->z_fuid_replay == NULL) { |
5331 | 343 |
fuidstart = (caddr_t)(lracl + 1) + xvatlen + |
5435 | 344 |
ZIL_ACE_LENGTH(lracl->lr_acl_bytes); |
5331 | 345 |
zfsvfs->z_fuid_replay = |
346 |
zfs_replay_fuids(fuidstart, |
|
347 |
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, |
|
348 |
lr->lr_uid, lr->lr_gid); |
|
5833
1a2bbcec3b3f
6649841 zfs_replay_create_acl() has improper indentation
marks
parents:
5435
diff
changeset
|
349 |
} |
5331 | 350 |
|
351 |
error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, |
|
352 |
0, 0, &vp, kcred, vflg, NULL, &vsec); |
|
353 |
break; |
|
354 |
case TX_MKDIR_ACL: |
|
355 |
aclstart = (caddr_t)(lracl + 1); |
|
5435 | 356 |
fuidstart = (caddr_t)aclstart + |
357 |
ZIL_ACE_LENGTH(lracl->lr_acl_bytes); |
|
5331 | 358 |
zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, |
359 |
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, |
|
360 |
lr->lr_uid, lr->lr_gid); |
|
361 |
/*FALLTHROUGH*/ |
|
362 |
case TX_MKDIR_ACL_ATTR: |
|
363 |
if (name == NULL) { |
|
364 |
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); |
|
365 |
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); |
|
366 |
zfs_replay_xvattr(lrattr, &xva); |
|
367 |
} |
|
368 |
vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS; |
|
369 |
vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen; |
|
370 |
vsec.vsa_aclcnt = lracl->lr_aclcnt; |
|
371 |
vsec.vsa_aclentsz = lracl->lr_acl_bytes; |
|
372 |
vsec.vsa_aclflags = lracl->lr_acl_flags; |
|
5833
1a2bbcec3b3f
6649841 zfs_replay_create_acl() has improper indentation
marks
parents:
5435
diff
changeset
|
373 |
if (zfsvfs->z_fuid_replay == NULL) { |
5331 | 374 |
fuidstart = (caddr_t)(lracl + 1) + xvatlen + |
5435 | 375 |
ZIL_ACE_LENGTH(lracl->lr_acl_bytes); |
5331 | 376 |
zfsvfs->z_fuid_replay = |
377 |
zfs_replay_fuids(fuidstart, |
|
378 |
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, |
|
379 |
lr->lr_uid, lr->lr_gid); |
|
5833
1a2bbcec3b3f
6649841 zfs_replay_create_acl() has improper indentation
marks
parents:
5435
diff
changeset
|
380 |
} |
5331 | 381 |
error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, |
382 |
&vp, kcred, NULL, vflg, &vsec); |
|
383 |
break; |
|
384 |
default: |
|
385 |
error = ENOTSUP; |
|
386 |
} |
|
387 |
||
388 |
bail: |
|
389 |
if (error == 0 && vp != NULL) |
|
390 |
VN_RELE(vp); |
|
391 |
||
392 |
VN_RELE(ZTOV(dzp)); |
|
393 |
||
394 |
zfs_fuid_info_free(zfsvfs->z_fuid_replay); |
|
395 |
zfsvfs->z_fuid_replay = NULL; |
|
396 |
||
397 |
return (error); |
|
398 |
} |
|
399 |
||
400 |
static int |
|
401 |
zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) |
|
402 |
{ |
|
403 |
char *name = NULL; /* location determined later */ |
|
404 |
char *link; /* symlink content follows name */ |
|
405 |
znode_t *dzp; |
|
406 |
vnode_t *vp = NULL; |
|
407 |
xvattr_t xva; |
|
408 |
int vflg = 0; |
|
409 |
size_t lrsize = sizeof (lr_create_t); |
|
410 |
lr_attr_t *lrattr; |
|
411 |
void *start; |
|
412 |
size_t xvatlen; |
|
413 |
uint64_t txtype; |
|
414 |
int error; |
|
415 |
||
416 |
if (byteswap) { |
|
417 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
418 |
txtype = (int)lr->lr_common.lrc_txtype; |
|
419 |
if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR) |
|
420 |
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); |
|
421 |
} |
|
422 |
||
423 |
||
424 |
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
|
425 |
return (error); |
|
426 |
||
427 |
xva_init(&xva); |
|
428 |
zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID, |
|
429 |
lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); |
|
430 |
||
431 |
/* |
|
432 |
* All forms of zfs create (create, mkdir, mkxattrdir, symlink) |
|
433 |
* eventually end up in zfs_mknode(), which assigns the object's |
|
434 |
* creation time and generation number. The generic VOP_CREATE() |
|
435 |
* doesn't have either concept, so we smuggle the values inside |
|
436 |
* the vattr's otherwise unused va_ctime and va_nblocks fields. |
|
437 |
*/ |
|
438 |
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); |
|
439 |
xva.xva_vattr.va_nblocks = lr->lr_gen; |
|
789 | 440 |
|
4480
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
2638
diff
changeset
|
441 |
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
2638
diff
changeset
|
442 |
if (error != ENOENT) |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
2638
diff
changeset
|
443 |
goto out; |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
2638
diff
changeset
|
444 |
|
5331 | 445 |
if (lr->lr_common.lrc_txtype & TX_CI) |
446 |
vflg |= FIGNORECASE; |
|
447 |
||
448 |
/* |
|
449 |
* Symlinks don't have fuid info, and CIFS never creates |
|
450 |
* symlinks. |
|
451 |
* |
|
452 |
* The _ATTR versions will grab the fuid info in their subcases. |
|
453 |
*/ |
|
454 |
if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK && |
|
455 |
(int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR && |
|
456 |
(int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) { |
|
457 |
start = (lr + 1); |
|
458 |
zfsvfs->z_fuid_replay = |
|
459 |
zfs_replay_fuid_domain(start, &start, |
|
460 |
lr->lr_uid, lr->lr_gid); |
|
461 |
} |
|
462 |
||
789 | 463 |
switch ((int)lr->lr_common.lrc_txtype) { |
5331 | 464 |
case TX_CREATE_ATTR: |
465 |
lrattr = (lr_attr_t *)(caddr_t)(lr + 1); |
|
466 |
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); |
|
467 |
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); |
|
468 |
start = (caddr_t)(lr + 1) + xvatlen; |
|
469 |
zfsvfs->z_fuid_replay = |
|
470 |
zfs_replay_fuid_domain(start, &start, |
|
471 |
lr->lr_uid, lr->lr_gid); |
|
472 |
name = (char *)start; |
|
473 |
||
474 |
/*FALLTHROUGH*/ |
|
789 | 475 |
case TX_CREATE: |
5331 | 476 |
if (name == NULL) |
477 |
name = (char *)start; |
|
478 |
||
479 |
error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, |
|
480 |
0, 0, &vp, kcred, vflg, NULL, NULL); |
|
789 | 481 |
break; |
5331 | 482 |
case TX_MKDIR_ATTR: |
483 |
lrattr = (lr_attr_t *)(caddr_t)(lr + 1); |
|
484 |
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); |
|
485 |
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); |
|
486 |
start = (caddr_t)(lr + 1) + xvatlen; |
|
487 |
zfsvfs->z_fuid_replay = |
|
488 |
zfs_replay_fuid_domain(start, &start, |
|
489 |
lr->lr_uid, lr->lr_gid); |
|
490 |
name = (char *)start; |
|
491 |
||
492 |
/*FALLTHROUGH*/ |
|
789 | 493 |
case TX_MKDIR: |
5331 | 494 |
if (name == NULL) |
495 |
name = (char *)(lr + 1); |
|
496 |
||
497 |
error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, |
|
498 |
&vp, kcred, NULL, vflg, NULL); |
|
789 | 499 |
break; |
500 |
case TX_MKXATTR: |
|
5331 | 501 |
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred); |
789 | 502 |
break; |
503 |
case TX_SYMLINK: |
|
5331 | 504 |
name = (char *)(lr + 1); |
789 | 505 |
link = name + strlen(name) + 1; |
5331 | 506 |
error = VOP_SYMLINK(ZTOV(dzp), name, &xva.xva_vattr, |
507 |
link, kcred, NULL, vflg); |
|
789 | 508 |
break; |
509 |
default: |
|
510 |
error = ENOTSUP; |
|
511 |
} |
|
512 |
||
4480
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
2638
diff
changeset
|
513 |
out: |
789 | 514 |
if (error == 0 && vp != NULL) |
515 |
VN_RELE(vp); |
|
516 |
||
517 |
VN_RELE(ZTOV(dzp)); |
|
518 |
||
5331 | 519 |
if (zfsvfs->z_fuid_replay) |
520 |
zfs_fuid_info_free(zfsvfs->z_fuid_replay); |
|
521 |
zfsvfs->z_fuid_replay = NULL; |
|
789 | 522 |
return (error); |
523 |
} |
|
524 |
||
525 |
static int |
|
526 |
zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) |
|
527 |
{ |
|
528 |
char *name = (char *)(lr + 1); /* name follows lr_remove_t */ |
|
529 |
znode_t *dzp; |
|
530 |
int error; |
|
5331 | 531 |
int vflg = 0; |
789 | 532 |
|
533 |
if (byteswap) |
|
534 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
535 |
||
536 |
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
|
537 |
return (error); |
|
538 |
||
5331 | 539 |
if (lr->lr_common.lrc_txtype & TX_CI) |
540 |
vflg |= FIGNORECASE; |
|
541 |
||
789 | 542 |
switch ((int)lr->lr_common.lrc_txtype) { |
543 |
case TX_REMOVE: |
|
5331 | 544 |
error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg); |
789 | 545 |
break; |
546 |
case TX_RMDIR: |
|
5331 | 547 |
error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg); |
789 | 548 |
break; |
549 |
default: |
|
550 |
error = ENOTSUP; |
|
551 |
} |
|
552 |
||
553 |
VN_RELE(ZTOV(dzp)); |
|
554 |
||
555 |
return (error); |
|
556 |
} |
|
557 |
||
558 |
static int |
|
559 |
zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap) |
|
560 |
{ |
|
561 |
char *name = (char *)(lr + 1); /* name follows lr_link_t */ |
|
562 |
znode_t *dzp, *zp; |
|
563 |
int error; |
|
5331 | 564 |
int vflg = 0; |
789 | 565 |
|
566 |
if (byteswap) |
|
567 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
568 |
||
569 |
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) |
|
570 |
return (error); |
|
571 |
||
572 |
if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { |
|
573 |
VN_RELE(ZTOV(dzp)); |
|
574 |
return (error); |
|
575 |
} |
|
576 |
||
5331 | 577 |
if (lr->lr_common.lrc_txtype & TX_CI) |
578 |
vflg |= FIGNORECASE; |
|
579 |
||
580 |
error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); |
|
789 | 581 |
|
582 |
VN_RELE(ZTOV(zp)); |
|
583 |
VN_RELE(ZTOV(dzp)); |
|
584 |
||
585 |
return (error); |
|
586 |
} |
|
587 |
||
588 |
static int |
|
589 |
zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap) |
|
590 |
{ |
|
591 |
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ |
|
592 |
char *tname = sname + strlen(sname) + 1; |
|
593 |
znode_t *sdzp, *tdzp; |
|
594 |
int error; |
|
5331 | 595 |
int vflg = 0; |
789 | 596 |
|
597 |
if (byteswap) |
|
598 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
599 |
||
600 |
if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) |
|
601 |
return (error); |
|
602 |
||
603 |
if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { |
|
604 |
VN_RELE(ZTOV(sdzp)); |
|
605 |
return (error); |
|
606 |
} |
|
607 |
||
5331 | 608 |
if (lr->lr_common.lrc_txtype & TX_CI) |
609 |
vflg |= FIGNORECASE; |
|
610 |
||
611 |
error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, |
|
612 |
NULL, vflg); |
|
789 | 613 |
|
614 |
VN_RELE(ZTOV(tdzp)); |
|
615 |
VN_RELE(ZTOV(sdzp)); |
|
616 |
||
617 |
return (error); |
|
618 |
} |
|
619 |
||
620 |
static int |
|
621 |
zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) |
|
622 |
{ |
|
623 |
char *data = (char *)(lr + 1); /* data follows lr_write_t */ |
|
624 |
znode_t *zp; |
|
625 |
int error; |
|
626 |
ssize_t resid; |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
627 |
uint64_t orig_eof, eod, offset, length; |
10800
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
628 |
|
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
629 |
if (byteswap) |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
630 |
byteswap_uint64_array(lr, sizeof (*lr)); |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
631 |
|
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
632 |
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
633 |
/* |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
634 |
* As we can log writes out of order, it's possible the |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
635 |
* file has been removed. In this case just drop the write |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
636 |
* and return success. |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
637 |
*/ |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
638 |
if (error == ENOENT) |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
639 |
error = 0; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
640 |
return (error); |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
641 |
} |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
642 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
643 |
offset = lr->lr_offset; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
644 |
length = lr->lr_length; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
645 |
eod = offset + length; /* end of data for this write */ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
646 |
|
10800
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
647 |
orig_eof = zp->z_phys->zp_size; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
648 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
649 |
/* If it's a dmu_sync() block, write the whole block */ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
650 |
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
651 |
uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
652 |
if (length < blocksize) { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
653 |
offset -= offset % blocksize; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
654 |
length = blocksize; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
655 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
656 |
} |
10800
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
657 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
658 |
error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
659 |
UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); |
10800
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
660 |
|
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
661 |
/* |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
662 |
* This may be a write from a dmu_sync() for a whole block, |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
663 |
* and may extend beyond the current end of the file. |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
664 |
* We can't just replay what was written for this TX_WRITE as |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
665 |
* a future TX_WRITE2 may extend the eof and the data for that |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
666 |
* write needs to be there. So we write the whole block and |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
667 |
* reduce the eof. |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
668 |
*/ |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
669 |
if (orig_eof < zp->z_phys->zp_size) /* file length grew ? */ |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
670 |
zp->z_phys->zp_size = eod; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
671 |
|
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
672 |
VN_RELE(ZTOV(zp)); |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
673 |
|
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
674 |
return (error); |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
675 |
} |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
676 |
|
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
677 |
/* |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
678 |
* TX_WRITE2 are only generated when dmu_sync() returns EALREADY |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
679 |
* meaning the pool block is already being synced. So now that we always write |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
680 |
* out full blocks, all we have to do is expand the eof if |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
681 |
* the file is grown. |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
682 |
*/ |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
683 |
static int |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
684 |
zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
685 |
{ |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
686 |
znode_t *zp; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
687 |
int error; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
688 |
uint64_t end; |
789 | 689 |
|
690 |
if (byteswap) |
|
691 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
692 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
693 |
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
789 | 694 |
return (error); |
695 |
||
10800
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
696 |
end = lr->lr_offset + lr->lr_length; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
697 |
if (end > zp->z_phys->zp_size) { |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
698 |
ASSERT3U(end - zp->z_phys->zp_size, <, zp->z_blksz); |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
699 |
zp->z_phys->zp_size = end; |
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
700 |
} |
789 | 701 |
|
702 |
VN_RELE(ZTOV(zp)); |
|
703 |
||
704 |
return (error); |
|
705 |
} |
|
706 |
||
707 |
static int |
|
708 |
zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) |
|
709 |
{ |
|
710 |
znode_t *zp; |
|
711 |
flock64_t fl; |
|
712 |
int error; |
|
713 |
||
714 |
if (byteswap) |
|
715 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
716 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
717 |
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
789 | 718 |
return (error); |
719 |
||
720 |
bzero(&fl, sizeof (fl)); |
|
721 |
fl.l_type = F_WRLCK; |
|
722 |
fl.l_whence = 0; |
|
723 |
fl.l_start = lr->lr_offset; |
|
724 |
fl.l_len = lr->lr_length; |
|
725 |
||
726 |
error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, |
|
727 |
lr->lr_offset, kcred, NULL); |
|
728 |
||
729 |
VN_RELE(ZTOV(zp)); |
|
730 |
||
731 |
return (error); |
|
732 |
} |
|
733 |
||
734 |
static int |
|
735 |
zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) |
|
736 |
{ |
|
737 |
znode_t *zp; |
|
5331 | 738 |
xvattr_t xva; |
739 |
vattr_t *vap = &xva.xva_vattr; |
|
789 | 740 |
int error; |
5331 | 741 |
void *start; |
789 | 742 |
|
5331 | 743 |
xva_init(&xva); |
744 |
if (byteswap) { |
|
789 | 745 |
byteswap_uint64_array(lr, sizeof (*lr)); |
746 |
||
5331 | 747 |
if ((lr->lr_mask & AT_XVATTR) && |
748 |
zfsvfs->z_version >= ZPL_VERSION_INITIAL) |
|
749 |
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); |
|
750 |
} |
|
751 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
752 |
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
789 | 753 |
return (error); |
754 |
||
5331 | 755 |
zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, |
789 | 756 |
lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); |
757 |
||
5331 | 758 |
vap->va_size = lr->lr_size; |
759 |
ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); |
|
760 |
ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); |
|
761 |
||
762 |
/* |
|
763 |
* Fill in xvattr_t portions if necessary. |
|
764 |
*/ |
|
789 | 765 |
|
5331 | 766 |
start = (lr_setattr_t *)(lr + 1); |
767 |
if (vap->va_mask & AT_XVATTR) { |
|
768 |
zfs_replay_xvattr((lr_attr_t *)start, &xva); |
|
769 |
start = (caddr_t)start + |
|
770 |
ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); |
|
771 |
} else |
|
772 |
xva.xva_vattr.va_mask &= ~AT_XVATTR; |
|
789 | 773 |
|
5331 | 774 |
zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, |
775 |
lr->lr_uid, lr->lr_gid); |
|
776 |
||
777 |
error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL); |
|
778 |
||
779 |
zfs_fuid_info_free(zfsvfs->z_fuid_replay); |
|
780 |
zfsvfs->z_fuid_replay = NULL; |
|
789 | 781 |
VN_RELE(ZTOV(zp)); |
782 |
||
783 |
return (error); |
|
784 |
} |
|
785 |
||
786 |
static int |
|
5331 | 787 |
zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) |
789 | 788 |
{ |
789 |
ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ |
|
790 |
vsecattr_t vsa; |
|
791 |
znode_t *zp; |
|
792 |
int error; |
|
793 |
||
5435 | 794 |
if (byteswap) { |
795 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
796 |
zfs_oldace_byteswap(ace, lr->lr_aclcnt); |
|
797 |
} |
|
798 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
799 |
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
789 | 800 |
return (error); |
801 |
||
802 |
bzero(&vsa, sizeof (vsa)); |
|
803 |
vsa.vsa_mask = VSA_ACE | VSA_ACECNT; |
|
804 |
vsa.vsa_aclcnt = lr->lr_aclcnt; |
|
6514
852c82a1989c
6689561 zfs_replay_acl_v0() doesn't completely initialize vsecattr
marks
parents:
5833
diff
changeset
|
805 |
vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt; |
852c82a1989c
6689561 zfs_replay_acl_v0() doesn't completely initialize vsecattr
marks
parents:
5833
diff
changeset
|
806 |
vsa.vsa_aclflags = 0; |
789 | 807 |
vsa.vsa_aclentp = ace; |
808 |
||
5331 | 809 |
error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); |
810 |
||
811 |
VN_RELE(ZTOV(zp)); |
|
812 |
||
813 |
return (error); |
|
814 |
} |
|
815 |
||
816 |
/* |
|
817 |
* Replaying ACLs is complicated by FUID support. |
|
818 |
* The log record may contain some optional data |
|
819 |
* to be used for replaying FUID's. These pieces |
|
820 |
* are the actual FUIDs that were created initially. |
|
821 |
* The FUID table index may no longer be valid and |
|
822 |
* during zfs_create() a new index may be assigned. |
|
823 |
* Because of this the log will contain the original |
|
824 |
* doman+rid in order to create a new FUID. |
|
825 |
* |
|
826 |
* The individual ACEs may contain an ephemeral uid/gid which is no |
|
827 |
* longer valid and will need to be replaced with an actual FUID. |
|
828 |
* |
|
829 |
*/ |
|
830 |
static int |
|
831 |
zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) |
|
832 |
{ |
|
833 |
ace_t *ace = (ace_t *)(lr + 1); |
|
834 |
vsecattr_t vsa; |
|
835 |
znode_t *zp; |
|
836 |
int error; |
|
789 | 837 |
|
5435 | 838 |
if (byteswap) { |
839 |
byteswap_uint64_array(lr, sizeof (*lr)); |
|
840 |
zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); |
|
841 |
if (lr->lr_fuidcnt) { |
|
842 |
byteswap_uint64_array((caddr_t)ace + |
|
843 |
ZIL_ACE_LENGTH(lr->lr_acl_bytes), |
|
844 |
lr->lr_fuidcnt * sizeof (uint64_t)); |
|
845 |
} |
|
846 |
} |
|
847 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10800
diff
changeset
|
848 |
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) |
5331 | 849 |
return (error); |
850 |
||
851 |
bzero(&vsa, sizeof (vsa)); |
|
852 |
vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS; |
|
853 |
vsa.vsa_aclcnt = lr->lr_aclcnt; |
|
854 |
vsa.vsa_aclentp = ace; |
|
855 |
vsa.vsa_aclentsz = lr->lr_acl_bytes; |
|
856 |
vsa.vsa_aclflags = lr->lr_acl_flags; |
|
857 |
||
858 |
if (lr->lr_fuidcnt) { |
|
5435 | 859 |
void *fuidstart = (caddr_t)ace + |
860 |
ZIL_ACE_LENGTH(lr->lr_acl_bytes); |
|
5331 | 861 |
|
862 |
zfsvfs->z_fuid_replay = |
|
863 |
zfs_replay_fuids(fuidstart, &fuidstart, |
|
864 |
lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); |
|
865 |
} |
|
866 |
||
867 |
error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); |
|
868 |
||
869 |
if (zfsvfs->z_fuid_replay) |
|
870 |
zfs_fuid_info_free(zfsvfs->z_fuid_replay); |
|
871 |
||
872 |
zfsvfs->z_fuid_replay = NULL; |
|
789 | 873 |
VN_RELE(ZTOV(zp)); |
874 |
||
875 |
return (error); |
|
876 |
} |
|
877 |
||
878 |
/* |
|
879 |
* Callback vectors for replaying records |
|
880 |
*/ |
|
881 |
zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { |
|
882 |
zfs_replay_error, /* 0 no such transaction type */ |
|
883 |
zfs_replay_create, /* TX_CREATE */ |
|
884 |
zfs_replay_create, /* TX_MKDIR */ |
|
885 |
zfs_replay_create, /* TX_MKXATTR */ |
|
886 |
zfs_replay_create, /* TX_SYMLINK */ |
|
887 |
zfs_replay_remove, /* TX_REMOVE */ |
|
888 |
zfs_replay_remove, /* TX_RMDIR */ |
|
889 |
zfs_replay_link, /* TX_LINK */ |
|
890 |
zfs_replay_rename, /* TX_RENAME */ |
|
891 |
zfs_replay_write, /* TX_WRITE */ |
|
892 |
zfs_replay_truncate, /* TX_TRUNCATE */ |
|
893 |
zfs_replay_setattr, /* TX_SETATTR */ |
|
5331 | 894 |
zfs_replay_acl_v0, /* TX_ACL_V0 */ |
789 | 895 |
zfs_replay_acl, /* TX_ACL */ |
5331 | 896 |
zfs_replay_create_acl, /* TX_CREATE_ACL */ |
897 |
zfs_replay_create, /* TX_CREATE_ATTR */ |
|
898 |
zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */ |
|
899 |
zfs_replay_create_acl, /* TX_MKDIR_ACL */ |
|
900 |
zfs_replay_create, /* TX_MKDIR_ATTR */ |
|
901 |
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ |
|
10800
469478b180d9
6880764 fsync on zfs is broken if writes are greater than 32kb on a hard crash and no log attached
Neil Perrin <Neil.Perrin@Sun.COM>
parents:
10793
diff
changeset
|
902 |
zfs_replay_write2, /* TX_WRITE2 */ |
789 | 903 |
}; |