author | eschrock |
Fri, 03 Mar 2006 20:08:16 -0800 | |
changeset 1544 | 938876158511 |
parent 896 | f5270e6bd04d |
child 2391 | 2fa3fd1db808 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1544 | 22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#ifndef _SYS_DBUF_H |
|
27 |
#define _SYS_DBUF_H |
|
28 |
||
29 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
30 |
||
31 |
#include <sys/dmu.h> |
|
32 |
#include <sys/spa.h> |
|
33 |
#include <sys/txg.h> |
|
34 |
#include <sys/zio.h> |
|
35 |
#include <sys/arc.h> |
|
36 |
#include <sys/zfs_context.h> |
|
37 |
#include <sys/refcount.h> |
|
38 |
||
39 |
#ifdef __cplusplus |
|
40 |
extern "C" { |
|
41 |
#endif |
|
42 |
||
43 |
#define DB_BONUS_BLKID (-1ULL) |
|
44 |
#define IN_DMU_SYNC ((blkptr_t *)-1) |
|
45 |
||
46 |
/* |
|
1544 | 47 |
* define flags for dbuf_read |
789 | 48 |
*/ |
49 |
||
50 |
#define DB_RF_MUST_SUCCEED 0 |
|
51 |
#define DB_RF_CANFAIL (1 << 1) |
|
52 |
#define DB_RF_HAVESTRUCT (1 << 2) |
|
53 |
#define DB_RF_NOPREFETCH (1 << 3) |
|
1544 | 54 |
#define DB_RF_NEVERWAIT (1 << 4) |
789 | 55 |
|
56 |
/* |
|
57 |
* The state transition diagram for dbufs looks like: |
|
58 |
* |
|
59 |
* +----> READ ----+ |
|
60 |
* | | |
|
61 |
* | V |
|
1544 | 62 |
* (alloc)-->UNCACHED CACHED-->EVICTING-->(free) |
789 | 63 |
* | ^ |
64 |
* | | |
|
65 |
* +----> FILL ----+ |
|
66 |
*/ |
|
67 |
typedef enum dbuf_states { |
|
68 |
DB_UNCACHED, |
|
69 |
DB_FILL, |
|
70 |
DB_READ, |
|
1544 | 71 |
DB_CACHED, |
72 |
DB_EVICTING |
|
789 | 73 |
} dbuf_states_t; |
74 |
||
75 |
struct objset_impl; |
|
76 |
struct dnode; |
|
77 |
struct dmu_tx; |
|
78 |
||
79 |
/* |
|
80 |
* level = 0 means the user data |
|
81 |
* level = 1 means the single indirect block |
|
82 |
* etc. |
|
83 |
*/ |
|
84 |
||
85 |
#define LIST_LINK_INACTIVE(link) \ |
|
86 |
((link)->list_next == NULL && (link)->list_prev == NULL) |
|
87 |
||
88 |
typedef struct dmu_buf_impl { |
|
89 |
/* |
|
90 |
* The following members are immutable, with the exception of |
|
91 |
* db.db_data, which is protected by db_mtx. |
|
92 |
*/ |
|
93 |
||
94 |
/* the publicly visible structure */ |
|
95 |
dmu_buf_t db; |
|
96 |
||
97 |
/* the objset we belong to */ |
|
98 |
struct objset_impl *db_objset; |
|
99 |
||
100 |
/* |
|
101 |
* the dnode we belong to (NULL when evicted) |
|
102 |
*/ |
|
103 |
struct dnode *db_dnode; |
|
104 |
||
105 |
/* |
|
106 |
* our parent buffer; if the dnode points to us directly, |
|
107 |
* db_parent == db_dnode->dn_dbuf |
|
108 |
* only accessed by sync thread ??? |
|
109 |
* (NULL when evicted) |
|
110 |
*/ |
|
111 |
struct dmu_buf_impl *db_parent; |
|
112 |
||
113 |
/* |
|
114 |
* link for hash table of all dmu_buf_impl_t's |
|
115 |
*/ |
|
116 |
struct dmu_buf_impl *db_hash_next; |
|
117 |
||
118 |
/* our block number */ |
|
119 |
uint64_t db_blkid; |
|
120 |
||
121 |
/* |
|
122 |
* Pointer to the blkptr_t which points to us. May be NULL if we |
|
123 |
* don't have one yet. (NULL when evicted) |
|
124 |
*/ |
|
125 |
blkptr_t *db_blkptr; |
|
126 |
||
127 |
/* |
|
128 |
* Our indirection level. Data buffers have db_level==0. |
|
129 |
* Indirect buffers which point to data buffers have |
|
130 |
* db_level==1. etc. Buffers which contain dnodes have |
|
131 |
* db_level==0, since the dnodes are stored in a file. |
|
132 |
*/ |
|
133 |
uint8_t db_level; |
|
134 |
||
135 |
/* db_mtx protects the members below */ |
|
136 |
kmutex_t db_mtx; |
|
137 |
||
138 |
/* |
|
139 |
* Current state of the buffer |
|
140 |
*/ |
|
141 |
dbuf_states_t db_state; |
|
142 |
||
143 |
/* |
|
144 |
* Refcount accessed by dmu_buf_{hold,rele}. |
|
145 |
* If nonzero, the buffer can't be destroyed. |
|
146 |
* Protected by db_mtx. |
|
147 |
*/ |
|
148 |
refcount_t db_holds; |
|
149 |
||
150 |
/* buffer holding our data */ |
|
151 |
arc_buf_t *db_buf; |
|
152 |
||
153 |
kcondvar_t db_changed; |
|
154 |
arc_buf_t *db_data_pending; |
|
155 |
||
156 |
/* |
|
157 |
* Last time (transaction group) this buffer was dirtied. |
|
158 |
*/ |
|
159 |
uint64_t db_dirtied; |
|
160 |
||
161 |
/* |
|
1544 | 162 |
* If db_dnode != NULL, our link on the owner dnodes's dn_dbufs list. |
163 |
* Protected by its dn_dbufs_mtx. |
|
789 | 164 |
*/ |
165 |
list_node_t db_link; |
|
166 |
||
167 |
/* Our link on dn_dirty_dbufs[txg] */ |
|
168 |
list_node_t db_dirty_node[TXG_SIZE]; |
|
169 |
uint8_t db_dirtycnt; |
|
170 |
||
171 |
/* |
|
172 |
* Data which is unique to data (leaf) blocks: |
|
173 |
*/ |
|
174 |
struct { |
|
175 |
/* stuff we store for the user (see dmu_buf_set_user) */ |
|
176 |
void *db_user_ptr; |
|
177 |
void **db_user_data_ptr_ptr; |
|
178 |
dmu_buf_evict_func_t *db_evict_func; |
|
179 |
uint8_t db_immediate_evict; |
|
180 |
uint8_t db_freed_in_flight; |
|
181 |
||
182 |
/* |
|
183 |
* db_data_old[txg&TXG_MASK] is set when we |
|
184 |
* dirty the buffer, so that we can retain the |
|
185 |
* pointer even if it gets COW'd in a subsequent |
|
186 |
* transaction group. |
|
187 |
* |
|
188 |
* If the buffer is dirty in any txg, it can't |
|
189 |
* be destroyed. |
|
190 |
*/ |
|
191 |
/* |
|
192 |
* XXX Protected by db_mtx and dn_dirty_mtx. |
|
193 |
* db_mtx must be held to read db_dirty[], and |
|
194 |
* both db_mtx and dn_dirty_mtx must be held to |
|
195 |
* modify (dirty or clean). db_mtx must be held |
|
196 |
* before dn_dirty_mtx. |
|
197 |
*/ |
|
1544 | 198 |
void *db_data_old[TXG_SIZE]; |
789 | 199 |
blkptr_t *db_overridden_by[TXG_SIZE]; |
200 |
} db_d; |
|
201 |
} dmu_buf_impl_t; |
|
202 |
||
203 |
/* Note: the dbuf hash table is exposed only for the mdb module */ |
|
204 |
#define DBUF_MUTEXES 256 |
|
205 |
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) |
|
206 |
typedef struct dbuf_hash_table { |
|
207 |
uint64_t hash_table_mask; |
|
208 |
dmu_buf_impl_t **hash_table; |
|
209 |
kmutex_t hash_mutexes[DBUF_MUTEXES]; |
|
210 |
} dbuf_hash_table_t; |
|
211 |
||
212 |
||
213 |
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset); |
|
214 |
||
215 |
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data); |
|
1544 | 216 |
dmu_buf_impl_t *dbuf_create_bonus(struct dnode *dn); |
789 | 217 |
|
1544 | 218 |
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag); |
789 | 219 |
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid, |
220 |
void *tag); |
|
221 |
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create, |
|
222 |
void *tag, dmu_buf_impl_t **dbp); |
|
223 |
||
224 |
void dbuf_prefetch(struct dnode *dn, uint64_t blkid); |
|
225 |
||
226 |
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag); |
|
227 |
uint64_t dbuf_refcount(dmu_buf_impl_t *db); |
|
228 |
||
1544 | 229 |
void dbuf_rele(dmu_buf_impl_t *db, void *tag); |
789 | 230 |
|
231 |
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid); |
|
232 |
||
1544 | 233 |
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); |
789 | 234 |
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
1544 | 235 |
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); |
789 | 236 |
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx); |
237 |
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); |
|
238 |
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); |
|
239 |
void dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
240 |
||
1544 | 241 |
void dbuf_clear(dmu_buf_impl_t *db); |
789 | 242 |
void dbuf_evict(dmu_buf_impl_t *db); |
243 |
||
244 |
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
245 |
void dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx); |
|
246 |
void dbuf_unoverride(dmu_buf_impl_t *db, uint64_t txg); |
|
247 |
||
248 |
void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks, |
|
249 |
struct dmu_tx *); |
|
250 |
||
251 |
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx); |
|
252 |
||
253 |
void dbuf_init(void); |
|
254 |
void dbuf_fini(void); |
|
255 |
||
256 |
#ifdef ZFS_DEBUG |
|
257 |
||
258 |
/* |
|
259 |
* There should be a ## between the string literal and fmt, to make it |
|
896 | 260 |
* clear that we're joining two strings together, but gcc does not |
261 |
* support that preprocessor token. |
|
789 | 262 |
*/ |
263 |
#define dprintf_dbuf(dbuf, fmt, ...) do { \ |
|
264 |
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ |
|
265 |
char __db_buf[32]; \ |
|
266 |
uint64_t __db_obj = (dbuf)->db.db_object; \ |
|
267 |
if (__db_obj == DMU_META_DNODE_OBJECT) \ |
|
268 |
(void) strcpy(__db_buf, "mdn"); \ |
|
269 |
else \ |
|
270 |
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \ |
|
271 |
(u_longlong_t)__db_obj); \ |
|
272 |
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \ |
|
273 |
"obj=%s lvl=%u blkid=%lld " fmt, \ |
|
274 |
__db_buf, (dbuf)->db_level, \ |
|
275 |
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \ |
|
276 |
} \ |
|
277 |
_NOTE(CONSTCOND) } while (0) |
|
278 |
||
279 |
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \ |
|
896 | 280 |
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ |
281 |
char __blkbuf[BP_SPRINTF_LEN]; \ |
|
282 |
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \ |
|
789 | 283 |
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \ |
284 |
} \ |
|
285 |
_NOTE(CONSTCOND) } while (0) |
|
286 |
||
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
287 |
#define DBUF_VERIFY(db) dbuf_verify(db) |
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
288 |
|
789 | 289 |
#else |
290 |
||
291 |
#define dprintf_dbuf(db, fmt, ...) |
|
292 |
#define dprintf_dbuf_bp(db, bp, fmt, ...) |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
293 |
#define DBUF_VERIFY(db) |
789 | 294 |
|
295 |
#endif |
|
296 |
||
297 |
||
298 |
#ifdef __cplusplus |
|
299 |
} |
|
300 |
#endif |
|
301 |
||
302 |
#endif /* _SYS_DBUF_H */ |