author | maybee |
Fri, 11 Nov 2005 19:27:17 -0800 | |
changeset 896 | f5270e6bd04d |
parent 873 | adefbfa5f42d |
child 1544 | 938876158511 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
5 |
* Common Development and Distribution License, Version 1.0 only |
|
6 |
* (the "License"). You may not use this file except in compliance |
|
7 |
* with the License. |
|
8 |
* |
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 |
* or http://www.opensolaris.org/os/licensing. |
|
11 |
* See the License for the specific language governing permissions |
|
12 |
* and limitations under the License. |
|
13 |
* |
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 |
* If applicable, add the following below this CDDL HEADER, with the |
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 |
* |
|
20 |
* CDDL HEADER END |
|
21 |
*/ |
|
22 |
/* |
|
23 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
|
24 |
* Use is subject to license terms. |
|
25 |
*/ |
|
26 |
||
27 |
#ifndef _SYS_DBUF_H |
|
28 |
#define _SYS_DBUF_H |
|
29 |
||
30 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
31 |
||
32 |
#include <sys/dmu.h> |
|
33 |
#include <sys/spa.h> |
|
34 |
#include <sys/txg.h> |
|
35 |
#include <sys/zio.h> |
|
36 |
#include <sys/arc.h> |
|
37 |
#include <sys/zfs_context.h> |
|
38 |
#include <sys/refcount.h> |
|
39 |
||
40 |
#ifdef __cplusplus |
|
41 |
extern "C" { |
|
42 |
#endif |
|
43 |
||
44 |
#define DB_BONUS_BLKID (-1ULL) |
|
45 |
#define IN_DMU_SYNC ((blkptr_t *)-1) |
|
46 |
||
47 |
/* |
|
48 |
* define flags for dbuf_read and friends |
|
49 |
*/ |
|
50 |
||
51 |
#define DB_RF_MUST_SUCCEED 0 |
|
52 |
#define DB_RF_CANFAIL (1 << 1) |
|
53 |
#define DB_RF_HAVESTRUCT (1 << 2) |
|
54 |
#define DB_RF_NOPREFETCH (1 << 3) |
|
55 |
||
56 |
/* |
|
57 |
* The state transition diagram for dbufs looks like: |
|
58 |
* |
|
59 |
* +----> READ ----+ |
|
60 |
* | | |
|
61 |
* | V |
|
62 |
* (alloc)-->UNCACHED CACHED-->(free) |
|
63 |
* | ^ |
|
64 |
* | | |
|
65 |
* +----> FILL ----+ |
|
66 |
*/ |
|
67 |
typedef enum dbuf_states { |
|
68 |
DB_UNCACHED, |
|
69 |
DB_FILL, |
|
70 |
DB_READ, |
|
71 |
DB_CACHED |
|
72 |
} dbuf_states_t; |
|
73 |
||
74 |
struct objset_impl; |
|
75 |
struct dnode; |
|
76 |
struct dmu_tx; |
|
77 |
||
78 |
/* |
|
79 |
* level = 0 means the user data |
|
80 |
* level = 1 means the single indirect block |
|
81 |
* etc. |
|
82 |
*/ |
|
83 |
||
84 |
#define LIST_LINK_INACTIVE(link) \ |
|
85 |
((link)->list_next == NULL && (link)->list_prev == NULL) |
|
86 |
||
87 |
typedef struct dmu_buf_impl { |
|
88 |
/* |
|
89 |
* The following members are immutable, with the exception of |
|
90 |
* db.db_data, which is protected by db_mtx. |
|
91 |
*/ |
|
92 |
||
93 |
/* the publicly visible structure */ |
|
94 |
dmu_buf_t db; |
|
95 |
||
96 |
/* the objset we belong to */ |
|
97 |
struct objset_impl *db_objset; |
|
98 |
||
99 |
/* |
|
100 |
* the dnode we belong to (NULL when evicted) |
|
101 |
*/ |
|
102 |
struct dnode *db_dnode; |
|
103 |
||
104 |
/* |
|
105 |
* our parent buffer; if the dnode points to us directly, |
|
106 |
* db_parent == db_dnode->dn_dbuf |
|
107 |
* only accessed by sync thread ??? |
|
108 |
* (NULL when evicted) |
|
109 |
*/ |
|
110 |
struct dmu_buf_impl *db_parent; |
|
111 |
||
112 |
/* |
|
113 |
* link for hash table of all dmu_buf_impl_t's |
|
114 |
*/ |
|
115 |
struct dmu_buf_impl *db_hash_next; |
|
116 |
||
117 |
/* our block number */ |
|
118 |
uint64_t db_blkid; |
|
119 |
||
120 |
/* |
|
121 |
* Pointer to the blkptr_t which points to us. May be NULL if we |
|
122 |
* don't have one yet. (NULL when evicted) |
|
123 |
*/ |
|
124 |
blkptr_t *db_blkptr; |
|
125 |
||
126 |
/* |
|
127 |
* Our indirection level. Data buffers have db_level==0. |
|
128 |
* Indirect buffers which point to data buffers have |
|
129 |
* db_level==1. etc. Buffers which contain dnodes have |
|
130 |
* db_level==0, since the dnodes are stored in a file. |
|
131 |
*/ |
|
132 |
uint8_t db_level; |
|
133 |
||
134 |
/* db_mtx protects the members below */ |
|
135 |
kmutex_t db_mtx; |
|
136 |
||
137 |
/* |
|
138 |
* Current state of the buffer |
|
139 |
*/ |
|
140 |
dbuf_states_t db_state; |
|
141 |
||
142 |
/* |
|
143 |
* Refcount accessed by dmu_buf_{hold,rele}. |
|
144 |
* If nonzero, the buffer can't be destroyed. |
|
145 |
* Protected by db_mtx. |
|
146 |
*/ |
|
147 |
refcount_t db_holds; |
|
148 |
||
149 |
/* buffer holding our data */ |
|
150 |
arc_buf_t *db_buf; |
|
151 |
||
152 |
kcondvar_t db_changed; |
|
153 |
arc_buf_t *db_data_pending; |
|
154 |
||
155 |
/* |
|
156 |
* Last time (transaction group) this buffer was dirtied. |
|
157 |
*/ |
|
158 |
uint64_t db_dirtied; |
|
159 |
||
160 |
/* |
|
161 |
* If dd_dnode != NULL, our link on the owner dnodes's dn_dbufs list. |
|
162 |
* Protected by its dn_mtx. |
|
163 |
*/ |
|
164 |
list_node_t db_link; |
|
165 |
||
166 |
/* Our link on dn_dirty_dbufs[txg] */ |
|
167 |
list_node_t db_dirty_node[TXG_SIZE]; |
|
168 |
uint8_t db_dirtycnt; |
|
169 |
||
170 |
/* |
|
171 |
* Data which is unique to data (leaf) blocks: |
|
172 |
*/ |
|
173 |
struct { |
|
174 |
/* stuff we store for the user (see dmu_buf_set_user) */ |
|
175 |
void *db_user_ptr; |
|
176 |
void **db_user_data_ptr_ptr; |
|
177 |
dmu_buf_evict_func_t *db_evict_func; |
|
178 |
uint8_t db_immediate_evict; |
|
179 |
uint8_t db_freed_in_flight; |
|
180 |
||
181 |
/* |
|
182 |
* db_data_old[txg&TXG_MASK] is set when we |
|
183 |
* dirty the buffer, so that we can retain the |
|
184 |
* pointer even if it gets COW'd in a subsequent |
|
185 |
* transaction group. |
|
186 |
* |
|
187 |
* If the buffer is dirty in any txg, it can't |
|
188 |
* be destroyed. |
|
189 |
*/ |
|
190 |
/* |
|
191 |
* XXX Protected by db_mtx and dn_dirty_mtx. |
|
192 |
* db_mtx must be held to read db_dirty[], and |
|
193 |
* both db_mtx and dn_dirty_mtx must be held to |
|
194 |
* modify (dirty or clean). db_mtx must be held |
|
195 |
* before dn_dirty_mtx. |
|
196 |
*/ |
|
197 |
arc_buf_t *db_data_old[TXG_SIZE]; |
|
198 |
blkptr_t *db_overridden_by[TXG_SIZE]; |
|
199 |
} db_d; |
|
200 |
} dmu_buf_impl_t; |
|
201 |
||
202 |
/* Note: the dbuf hash table is exposed only for the mdb module */ |
|
203 |
#define DBUF_MUTEXES 256 |
|
204 |
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) |
|
205 |
typedef struct dbuf_hash_table { |
|
206 |
uint64_t hash_table_mask; |
|
207 |
dmu_buf_impl_t **hash_table; |
|
208 |
kmutex_t hash_mutexes[DBUF_MUTEXES]; |
|
209 |
} dbuf_hash_table_t; |
|
210 |
||
211 |
||
212 |
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset); |
|
213 |
||
214 |
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data); |
|
215 |
||
216 |
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid); |
|
217 |
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid, |
|
218 |
void *tag); |
|
219 |
dmu_buf_impl_t *dbuf_hold_bonus(struct dnode *dn, void *tag); |
|
220 |
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create, |
|
221 |
void *tag, dmu_buf_impl_t **dbp); |
|
222 |
||
223 |
void dbuf_prefetch(struct dnode *dn, uint64_t blkid); |
|
224 |
||
225 |
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag); |
|
226 |
void dbuf_remove_ref(dmu_buf_impl_t *db, void *tag); |
|
227 |
uint64_t dbuf_refcount(dmu_buf_impl_t *db); |
|
228 |
||
229 |
void dbuf_rele(dmu_buf_impl_t *db); |
|
230 |
||
231 |
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid); |
|
232 |
||
233 |
void dbuf_read(dmu_buf_impl_t *db); |
|
234 |
int dbuf_read_canfail(dmu_buf_impl_t *db); |
|
235 |
void dbuf_read_havestruct(dmu_buf_impl_t *db); |
|
236 |
void dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); |
|
237 |
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
238 |
void dbuf_will_fill(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
239 |
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
240 |
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); |
|
241 |
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); |
|
242 |
void dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
243 |
||
244 |
void dbuf_evict(dmu_buf_impl_t *db); |
|
245 |
||
246 |
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
247 |
void dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx); |
|
248 |
void dbuf_unoverride(dmu_buf_impl_t *db, uint64_t txg); |
|
249 |
||
250 |
void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks, |
|
251 |
struct dmu_tx *); |
|
252 |
||
253 |
void dbuf_downgrade(dmu_buf_impl_t *db, int evicting); |
|
254 |
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx); |
|
255 |
||
256 |
void dbuf_init(void); |
|
257 |
void dbuf_fini(void); |
|
258 |
||
259 |
#ifdef ZFS_DEBUG |
|
260 |
||
261 |
/* |
|
262 |
* There should be a ## between the string literal and fmt, to make it |
|
896 | 263 |
* clear that we're joining two strings together, but gcc does not |
264 |
* support that preprocessor token. |
|
789 | 265 |
*/ |
266 |
#define dprintf_dbuf(dbuf, fmt, ...) do { \ |
|
267 |
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ |
|
268 |
char __db_buf[32]; \ |
|
269 |
uint64_t __db_obj = (dbuf)->db.db_object; \ |
|
270 |
if (__db_obj == DMU_META_DNODE_OBJECT) \ |
|
271 |
(void) strcpy(__db_buf, "mdn"); \ |
|
272 |
else \ |
|
273 |
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \ |
|
274 |
(u_longlong_t)__db_obj); \ |
|
275 |
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \ |
|
276 |
"obj=%s lvl=%u blkid=%lld " fmt, \ |
|
277 |
__db_buf, (dbuf)->db_level, \ |
|
278 |
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \ |
|
279 |
} \ |
|
280 |
_NOTE(CONSTCOND) } while (0) |
|
281 |
||
282 |
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \ |
|
896 | 283 |
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ |
284 |
char __blkbuf[BP_SPRINTF_LEN]; \ |
|
285 |
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \ |
|
789 | 286 |
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \ |
287 |
} \ |
|
288 |
_NOTE(CONSTCOND) } while (0) |
|
289 |
||
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
290 |
#define DBUF_VERIFY(db) dbuf_verify(db) |
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
291 |
|
789 | 292 |
#else |
293 |
||
294 |
#define dprintf_dbuf(db, fmt, ...) |
|
295 |
#define dprintf_dbuf_bp(db, bp, fmt, ...) |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
296 |
#define DBUF_VERIFY(db) |
789 | 297 |
|
298 |
#endif |
|
299 |
||
300 |
||
301 |
#ifdef __cplusplus |
|
302 |
} |
|
303 |
#endif |
|
304 |
||
305 |
#endif /* _SYS_DBUF_H */ |