author | billm |
Mon, 12 Feb 2007 17:35:21 -0800 | |
changeset 3638 | 6b28ebc717aa |
parent 3461 | c19b22f347d6 |
child 3897 | 278bade789ba |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
3444
dc160a70a50d
6410433 'zpool status -v' would be more useful with filenames
ek110237
parents:
3063
diff
changeset
|
22 |
* Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#ifndef _SYS_FS_ZFS_ZNODE_H |
|
27 |
#define _SYS_FS_ZFS_ZNODE_H |
|
28 |
||
29 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
30 |
||
31 |
#ifdef _KERNEL |
|
32 |
#include <sys/isa_defs.h> |
|
33 |
#include <sys/types32.h> |
|
34 |
#include <sys/list.h> |
|
35 |
#include <sys/dmu.h> |
|
36 |
#include <sys/zfs_vfsops.h> |
|
37 |
#endif |
|
38 |
#include <sys/zfs_acl.h> |
|
39 |
#include <sys/zil.h> |
|
40 |
||
41 |
#ifdef __cplusplus |
|
42 |
extern "C" { |
|
43 |
#endif |
|
44 |
||
45 |
/* |
|
46 |
* Define special zfs pflags |
|
47 |
*/ |
|
48 |
#define ZFS_XATTR 0x1 /* is an extended attribute */ |
|
49 |
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ |
|
905
920e9b2e0899
6347134 zfs_zaccess() is killing ZFS stat() performance
marks
parents:
869
diff
changeset
|
50 |
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ |
789 | 51 |
|
52 |
#define MASTER_NODE_OBJ 1 |
|
53 |
||
54 |
/* |
|
55 |
* special attributes for master node. |
|
56 |
*/ |
|
57 |
||
58 |
#define ZFS_FSID "FSID" |
|
3461 | 59 |
#define ZFS_UNLINKED_SET "DELETE_QUEUE" |
789 | 60 |
#define ZFS_ROOT_OBJ "ROOT" |
1760 | 61 |
#define ZPL_VERSION_OBJ "VERSION" |
789 | 62 |
#define ZFS_PROP_BLOCKPERPAGE "BLOCKPERPAGE" |
63 |
#define ZFS_PROP_NOGROWBLOCKS "NOGROWBLOCKS" |
|
64 |
||
65 |
#define ZFS_FLAG_BLOCKPERPAGE 0x1 |
|
66 |
#define ZFS_FLAG_NOGROWBLOCKS 0x2 |
|
67 |
||
68 |
/* |
|
1760 | 69 |
* ZPL version - rev'd whenever an incompatible on-disk format change |
789 | 70 |
* occurs. Independent of SPA/DMU/ZAP versioning. |
71 |
*/ |
|
72 |
||
1760 | 73 |
#define ZPL_VERSION 1ULL |
789 | 74 |
|
75 |
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE) |
|
76 |
||
77 |
/* Path component length */ |
|
78 |
/* |
|
79 |
* The generic fs code uses MAXNAMELEN to represent |
|
80 |
* what the largest component length is. Unfortunately, |
|
81 |
* this length includes the terminating NULL. ZFS needs |
|
82 |
* to tell the users via pathconf() and statvfs() what the |
|
83 |
* true maximum length of a component is, excluding the NULL. |
|
84 |
*/ |
|
85 |
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1) |
|
86 |
||
87 |
/* |
|
88 |
* This is the persistent portion of the znode. It is stored |
|
89 |
* in the "bonus buffer" of the file. Short symbolic links |
|
90 |
* are also stored in the bonus buffer. |
|
91 |
*/ |
|
92 |
typedef struct znode_phys { |
|
93 |
uint64_t zp_atime[2]; /* 0 - last file access time */ |
|
94 |
uint64_t zp_mtime[2]; /* 16 - last file modification time */ |
|
95 |
uint64_t zp_ctime[2]; /* 32 - last file change time */ |
|
96 |
uint64_t zp_crtime[2]; /* 48 - creation time */ |
|
97 |
uint64_t zp_gen; /* 64 - generation (txg of creation) */ |
|
98 |
uint64_t zp_mode; /* 72 - file mode bits */ |
|
99 |
uint64_t zp_size; /* 80 - size of file */ |
|
100 |
uint64_t zp_parent; /* 88 - directory parent (`..') */ |
|
101 |
uint64_t zp_links; /* 96 - number of links to file */ |
|
102 |
uint64_t zp_xattr; /* 104 - DMU object for xattrs */ |
|
103 |
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */ |
|
104 |
uint64_t zp_flags; /* 120 - persistent flags */ |
|
105 |
uint64_t zp_uid; /* 128 - file owner */ |
|
106 |
uint64_t zp_gid; /* 136 - owning group */ |
|
107 |
uint64_t zp_pad[4]; /* 144 - future */ |
|
108 |
zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */ |
|
109 |
/* |
|
110 |
* Data may pad out any remaining bytes in the znode buffer, eg: |
|
111 |
* |
|
112 |
* |<---------------------- dnode_phys (512) ------------------------>| |
|
113 |
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->| |
|
114 |
* |<---- znode (264) ---->|<---- data (56) ---->| |
|
115 |
* |
|
116 |
* At present, we only use this space to store symbolic links. |
|
117 |
*/ |
|
118 |
} znode_phys_t; |
|
119 |
||
120 |
/* |
|
121 |
* Directory entry locks control access to directory entries. |
|
122 |
* They are used to protect creates, deletes, and renames. |
|
123 |
* Each directory znode has a mutex and a list of locked names. |
|
124 |
*/ |
|
125 |
#ifdef _KERNEL |
|
126 |
typedef struct zfs_dirlock { |
|
127 |
char *dl_name; /* directory entry being locked */ |
|
128 |
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */ |
|
129 |
uint16_t dl_namesize; /* set if dl_name was allocated */ |
|
130 |
kcondvar_t dl_cv; /* wait for entry to be unlocked */ |
|
131 |
struct znode *dl_dzp; /* directory znode */ |
|
132 |
struct zfs_dirlock *dl_next; /* next in z_dirlocks list */ |
|
133 |
} zfs_dirlock_t; |
|
134 |
||
135 |
typedef struct znode { |
|
136 |
struct zfsvfs *z_zfsvfs; |
|
137 |
vnode_t *z_vnode; |
|
138 |
uint64_t z_id; /* object ID for this znode */ |
|
139 |
kmutex_t z_lock; /* znode modification lock */ |
|
140 |
krwlock_t z_map_lock; /* page map lock */ |
|
1669 | 141 |
krwlock_t z_parent_lock; /* parent lock for directories */ |
789 | 142 |
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ |
1669 | 143 |
kmutex_t z_range_lock; /* protects changes to z_range_avl */ |
144 |
avl_tree_t z_range_avl; /* avl tree of file range locks */ |
|
3461 | 145 |
uint8_t z_unlinked; /* file has been unlinked */ |
789 | 146 |
uint8_t z_atime_dirty; /* atime needs to be synced */ |
147 |
uint8_t z_dbuf_held; /* Is z_dbuf already held? */ |
|
869
dc133b87dfb3
6297285 znode prefetching in zfs_readdir causes 5x performance degradation for 'ls'
perrin
parents:
789
diff
changeset
|
148 |
uint8_t z_zn_prefetch; /* Prefetch znodes? */ |
789 | 149 |
uint_t z_blksz; /* block size in bytes */ |
150 |
uint_t z_seq; /* modification sequence number */ |
|
1544 | 151 |
uint64_t z_mapcnt; /* number of pages mapped to file */ |
789 | 152 |
uint64_t z_last_itx; /* last ZIL itx on this znode */ |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2638
diff
changeset
|
153 |
uint32_t z_sync_cnt; /* synchronous open count */ |
789 | 154 |
kmutex_t z_acl_lock; /* acl data lock */ |
155 |
list_node_t z_link_node; /* all znodes in fs link */ |
|
156 |
/* |
|
157 |
* These are dmu managed fields. |
|
158 |
*/ |
|
159 |
znode_phys_t *z_phys; /* pointer to persistent znode */ |
|
160 |
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */ |
|
161 |
} znode_t; |
|
162 |
||
1669 | 163 |
|
789 | 164 |
/* |
1669 | 165 |
* Range locking rules |
166 |
* -------------------- |
|
167 |
* 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole |
|
168 |
* file range needs to be locked as RL_WRITER. Only then can the pages be |
|
169 |
* freed etc and zp_size reset. zp_size must be set within range lock. |
|
170 |
* 2. For writes and punching holes (zfs_write & zfs_space) just the range |
|
171 |
* being written or freed needs to be locked as RL_WRITER. |
|
172 |
* Multiple writes at the end of the file must coordinate zp_size updates |
|
173 |
* to ensure data isn't lost. A compare and swap loop is currently used |
|
174 |
* to ensure the file size is at least the offset last written. |
|
175 |
* 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being |
|
176 |
* read needs to be locked as RL_READER. A check against zp_size can then |
|
177 |
* be made for reading beyond end of file. |
|
789 | 178 |
*/ |
179 |
||
180 |
/* |
|
181 |
* Convert between znode pointers and vnode pointers |
|
182 |
*/ |
|
183 |
#define ZTOV(ZP) ((ZP)->z_vnode) |
|
184 |
#define VTOZ(VP) ((znode_t *)(VP)->v_data) |
|
185 |
||
186 |
/* |
|
187 |
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation. |
|
188 |
* ZFS_EXIT() must be called before exitting the vop. |
|
189 |
*/ |
|
190 |
#define ZFS_ENTER(zfsvfs) \ |
|
191 |
{ \ |
|
192 |
atomic_add_32(&(zfsvfs)->z_op_cnt, 1); \ |
|
193 |
if ((zfsvfs)->z_unmounted1) { \ |
|
194 |
ZFS_EXIT(zfsvfs); \ |
|
195 |
return (EIO); \ |
|
196 |
} \ |
|
197 |
} |
|
198 |
#define ZFS_EXIT(zfsvfs) atomic_add_32(&(zfsvfs)->z_op_cnt, -1) |
|
199 |
||
200 |
/* |
|
201 |
* Macros for dealing with dmu_buf_hold |
|
202 |
*/ |
|
203 |
#define ZFS_OBJ_HASH(obj_num) (obj_num & (ZFS_OBJ_MTX_SZ - 1)) |
|
204 |
#define ZFS_OBJ_MUTEX(zp) \ |
|
205 |
(&zp->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(zp->z_id)]) |
|
206 |
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \ |
|
207 |
mutex_enter(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]); |
|
208 |
||
209 |
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \ |
|
210 |
mutex_exit(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]) |
|
211 |
||
212 |
/* |
|
213 |
* Macros to encode/decode ZFS stored time values from/to struct timespec |
|
214 |
*/ |
|
215 |
#define ZFS_TIME_ENCODE(tp, stmp) \ |
|
216 |
{ \ |
|
217 |
stmp[0] = (uint64_t)(tp)->tv_sec; \ |
|
218 |
stmp[1] = (uint64_t)(tp)->tv_nsec; \ |
|
219 |
} |
|
220 |
||
221 |
#define ZFS_TIME_DECODE(tp, stmp) \ |
|
222 |
{ \ |
|
223 |
(tp)->tv_sec = (time_t)stmp[0]; \ |
|
224 |
(tp)->tv_nsec = (long)stmp[1]; \ |
|
225 |
} |
|
226 |
||
227 |
/* |
|
228 |
* Timestamp defines |
|
229 |
*/ |
|
230 |
#define ACCESSED (AT_ATIME) |
|
231 |
#define STATE_CHANGED (AT_CTIME) |
|
232 |
#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME) |
|
233 |
||
234 |
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \ |
|
235 |
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \ |
|
236 |
zfs_time_stamper(zp, ACCESSED, NULL) |
|
237 |
||
238 |
extern int zfs_init_fs(zfsvfs_t *, znode_t **, cred_t *); |
|
239 |
extern void zfs_set_dataprop(objset_t *); |
|
240 |
extern void zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx); |
|
241 |
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *); |
|
242 |
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *); |
|
1669 | 243 |
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *); |
1878
c22df0f5603f
6413573 deadlock between fsflush() and zfs_create()
maybee
parents:
1816
diff
changeset
|
244 |
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); |
789 | 245 |
extern void zfs_znode_init(void); |
246 |
extern void zfs_znode_fini(void); |
|
247 |
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); |
|
248 |
extern void zfs_zinactive(znode_t *); |
|
249 |
extern void zfs_znode_delete(znode_t *, dmu_tx_t *); |
|
250 |
extern void zfs_znode_free(znode_t *); |
|
251 |
extern void zfs_remove_op_tables(); |
|
252 |
extern int zfs_create_op_tables(); |
|
253 |
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr); |
|
1816
8c14b56c8515
6408482 64-bit system can't read some 32-bit dev_ts created on zfs
marks
parents:
1760
diff
changeset
|
254 |
extern dev_t zfs_cmpldev(uint64_t); |
789 | 255 |
|
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
256 |
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 257 |
znode_t *dzp, znode_t *zp, char *name); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
258 |
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 259 |
znode_t *dzp, char *name); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
260 |
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 261 |
znode_t *dzp, znode_t *zp, char *name); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
262 |
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 263 |
znode_t *dzp, znode_t *zp, char *name, char *link); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
264 |
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 265 |
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
266 |
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
3638
6b28ebc717aa
6496357 spec_fsync() is useless on devices that do write caching
billm
parents:
3461
diff
changeset
|
267 |
znode_t *zp, offset_t off, ssize_t len, int ioflag); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
268 |
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 269 |
znode_t *zp, uint64_t off, uint64_t len); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
270 |
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 271 |
znode_t *zp, vattr_t *vap, uint_t mask_applied); |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2597
diff
changeset
|
272 |
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, int txtype, |
789 | 273 |
znode_t *zp, int aclcnt, ace_t *z_ace); |
274 |
||
275 |
extern zil_get_data_t zfs_get_data; |
|
276 |
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; |
|
277 |
extern int zfsfstype; |
|
278 |
||
279 |
#endif /* _KERNEL */ |
|
280 |
||
3444
dc160a70a50d
6410433 'zpool status -v' would be more useful with filenames
ek110237
parents:
3063
diff
changeset
|
281 |
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); |
dc160a70a50d
6410433 'zpool status -v' would be more useful with filenames
ek110237
parents:
3063
diff
changeset
|
282 |
|
789 | 283 |
#ifdef __cplusplus |
284 |
} |
|
285 |
#endif |
|
286 |
||
287 |
#endif /* _SYS_FS_ZFS_ZNODE_H */ |