author | johansen |
Tue, 19 Dec 2006 23:13:06 -0800 | |
changeset 3290 | 256464cbb73c |
parent 3093 | 71525e4187d5 |
child 3547 | e396e0a440b1 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1491
bdcb30e07e7d
6389368 fat zap should use 16k blocks (with backwards compatability)
ahrens
parents:
1199
diff
changeset
|
5 |
* Common Development and Distribution License (the "License"). |
bdcb30e07e7d
6389368 fat zap should use 16k blocks (with backwards compatability)
ahrens
parents:
1199
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1199 | 22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/zfs_context.h> |
|
29 |
#include <sys/dmu.h> |
|
30 |
#include <sys/dmu_impl.h> |
|
31 |
#include <sys/dbuf.h> |
|
32 |
#include <sys/dmu_objset.h> |
|
33 |
#include <sys/dsl_dataset.h> |
|
34 |
#include <sys/dsl_dir.h> |
|
35 |
#include <sys/dmu_tx.h> |
|
36 |
#include <sys/spa.h> |
|
37 |
#include <sys/zio.h> |
|
38 |
#include <sys/dmu_zfetch.h> |
|
39 |
||
40 |
static void dbuf_destroy(dmu_buf_impl_t *db); |
|
41 |
static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); |
|
42 |
static arc_done_func_t dbuf_write_done; |
|
43 |
||
2986 | 44 |
int zfs_mdcomp_disable = 0; |
45 |
||
789 | 46 |
/* |
47 |
* Global data structures and functions for the dbuf cache. |
|
48 |
*/ |
|
49 |
taskq_t *dbuf_tq; |
|
50 |
static kmem_cache_t *dbuf_cache; |
|
51 |
||
52 |
/* ARGSUSED */ |
|
53 |
static int |
|
54 |
dbuf_cons(void *vdb, void *unused, int kmflag) |
|
55 |
{ |
|
56 |
dmu_buf_impl_t *db = vdb; |
|
57 |
bzero(db, sizeof (dmu_buf_impl_t)); |
|
58 |
||
59 |
mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); |
|
60 |
cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); |
|
61 |
refcount_create(&db->db_holds); |
|
62 |
return (0); |
|
63 |
} |
|
64 |
||
65 |
/* ARGSUSED */ |
|
66 |
static void |
|
67 |
dbuf_dest(void *vdb, void *unused) |
|
68 |
{ |
|
69 |
dmu_buf_impl_t *db = vdb; |
|
70 |
mutex_destroy(&db->db_mtx); |
|
71 |
cv_destroy(&db->db_changed); |
|
72 |
refcount_destroy(&db->db_holds); |
|
73 |
} |
|
74 |
||
75 |
/* |
|
76 |
* dbuf hash table routines |
|
77 |
*/ |
|
78 |
static dbuf_hash_table_t dbuf_hash_table; |
|
79 |
||
80 |
static uint64_t dbuf_hash_count; |
|
81 |
||
82 |
static uint64_t |
|
83 |
dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid) |
|
84 |
{ |
|
85 |
uintptr_t osv = (uintptr_t)os; |
|
86 |
uint64_t crc = -1ULL; |
|
87 |
||
88 |
ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); |
|
89 |
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF]; |
|
90 |
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF]; |
|
91 |
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF]; |
|
92 |
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF]; |
|
93 |
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF]; |
|
94 |
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF]; |
|
95 |
||
96 |
crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16); |
|
97 |
||
98 |
return (crc); |
|
99 |
} |
|
100 |
||
101 |
#define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid); |
|
102 |
||
103 |
#define DBUF_EQUAL(dbuf, os, obj, level, blkid) \ |
|
104 |
((dbuf)->db.db_object == (obj) && \ |
|
105 |
(dbuf)->db_objset == (os) && \ |
|
106 |
(dbuf)->db_level == (level) && \ |
|
107 |
(dbuf)->db_blkid == (blkid)) |
|
108 |
||
109 |
dmu_buf_impl_t * |
|
110 |
dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) |
|
111 |
{ |
|
112 |
dbuf_hash_table_t *h = &dbuf_hash_table; |
|
113 |
objset_impl_t *os = dn->dn_objset; |
|
114 |
uint64_t obj = dn->dn_object; |
|
115 |
uint64_t hv = DBUF_HASH(os, obj, level, blkid); |
|
116 |
uint64_t idx = hv & h->hash_table_mask; |
|
117 |
dmu_buf_impl_t *db; |
|
118 |
||
119 |
mutex_enter(DBUF_HASH_MUTEX(h, idx)); |
|
120 |
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) { |
|
121 |
if (DBUF_EQUAL(db, os, obj, level, blkid)) { |
|
122 |
mutex_enter(&db->db_mtx); |
|
1544 | 123 |
if (db->db_state != DB_EVICTING) { |
789 | 124 |
mutex_exit(DBUF_HASH_MUTEX(h, idx)); |
125 |
return (db); |
|
126 |
} |
|
127 |
mutex_exit(&db->db_mtx); |
|
128 |
} |
|
129 |
} |
|
130 |
mutex_exit(DBUF_HASH_MUTEX(h, idx)); |
|
131 |
return (NULL); |
|
132 |
} |
|
133 |
||
134 |
/* |
|
135 |
* Insert an entry into the hash table. If there is already an element |
|
136 |
* equal to elem in the hash table, then the already existing element |
|
137 |
* will be returned and the new element will not be inserted. |
|
138 |
* Otherwise returns NULL. |
|
139 |
*/ |
|
140 |
static dmu_buf_impl_t * |
|
141 |
dbuf_hash_insert(dmu_buf_impl_t *db) |
|
142 |
{ |
|
143 |
dbuf_hash_table_t *h = &dbuf_hash_table; |
|
144 |
objset_impl_t *os = db->db_objset; |
|
145 |
uint64_t obj = db->db.db_object; |
|
146 |
int level = db->db_level; |
|
147 |
uint64_t blkid = db->db_blkid; |
|
148 |
uint64_t hv = DBUF_HASH(os, obj, level, blkid); |
|
149 |
uint64_t idx = hv & h->hash_table_mask; |
|
150 |
dmu_buf_impl_t *dbf; |
|
151 |
||
152 |
mutex_enter(DBUF_HASH_MUTEX(h, idx)); |
|
153 |
for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) { |
|
154 |
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { |
|
155 |
mutex_enter(&dbf->db_mtx); |
|
1544 | 156 |
if (dbf->db_state != DB_EVICTING) { |
789 | 157 |
mutex_exit(DBUF_HASH_MUTEX(h, idx)); |
158 |
return (dbf); |
|
159 |
} |
|
160 |
mutex_exit(&dbf->db_mtx); |
|
161 |
} |
|
162 |
} |
|
163 |
||
164 |
mutex_enter(&db->db_mtx); |
|
165 |
db->db_hash_next = h->hash_table[idx]; |
|
166 |
h->hash_table[idx] = db; |
|
167 |
mutex_exit(DBUF_HASH_MUTEX(h, idx)); |
|
168 |
atomic_add_64(&dbuf_hash_count, 1); |
|
169 |
||
170 |
return (NULL); |
|
171 |
} |
|
172 |
||
173 |
/* |
|
174 |
* Remove an entry from the hash table. This operation will |
|
175 |
* fail if there are any existing holds on the db. |
|
176 |
*/ |
|
177 |
static void |
|
178 |
dbuf_hash_remove(dmu_buf_impl_t *db) |
|
179 |
{ |
|
180 |
dbuf_hash_table_t *h = &dbuf_hash_table; |
|
181 |
uint64_t hv = DBUF_HASH(db->db_objset, db->db.db_object, |
|
182 |
db->db_level, db->db_blkid); |
|
183 |
uint64_t idx = hv & h->hash_table_mask; |
|
184 |
dmu_buf_impl_t *dbf, **dbp; |
|
185 |
||
186 |
/* |
|
187 |
* We musn't hold db_mtx to maintin lock ordering: |
|
188 |
* DBUF_HASH_MUTEX > db_mtx. |
|
189 |
*/ |
|
190 |
ASSERT(refcount_is_zero(&db->db_holds)); |
|
1544 | 191 |
ASSERT(db->db_state == DB_EVICTING); |
789 | 192 |
ASSERT(!MUTEX_HELD(&db->db_mtx)); |
193 |
||
194 |
mutex_enter(DBUF_HASH_MUTEX(h, idx)); |
|
195 |
dbp = &h->hash_table[idx]; |
|
196 |
while ((dbf = *dbp) != db) { |
|
197 |
dbp = &dbf->db_hash_next; |
|
198 |
ASSERT(dbf != NULL); |
|
199 |
} |
|
200 |
*dbp = db->db_hash_next; |
|
201 |
db->db_hash_next = NULL; |
|
202 |
mutex_exit(DBUF_HASH_MUTEX(h, idx)); |
|
203 |
atomic_add_64(&dbuf_hash_count, -1); |
|
204 |
} |
|
205 |
||
1544 | 206 |
static arc_evict_func_t dbuf_do_evict; |
789 | 207 |
|
208 |
static void |
|
209 |
dbuf_evict_user(dmu_buf_impl_t *db) |
|
210 |
{ |
|
211 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
212 |
||
213 |
if (db->db_level != 0 || db->db_d.db_evict_func == NULL) |
|
214 |
return; |
|
215 |
||
216 |
if (db->db_d.db_user_data_ptr_ptr) |
|
217 |
*db->db_d.db_user_data_ptr_ptr = db->db.db_data; |
|
218 |
db->db_d.db_evict_func(&db->db, db->db_d.db_user_ptr); |
|
219 |
db->db_d.db_user_ptr = NULL; |
|
220 |
db->db_d.db_user_data_ptr_ptr = NULL; |
|
221 |
db->db_d.db_evict_func = NULL; |
|
222 |
} |
|
223 |
||
224 |
void |
|
1544 | 225 |
dbuf_evict(dmu_buf_impl_t *db) |
226 |
{ |
|
227 |
int i; |
|
228 |
||
229 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
230 |
ASSERT(db->db_buf == NULL); |
|
231 |
||
232 |
#ifdef ZFS_DEBUG |
|
233 |
for (i = 0; i < TXG_SIZE; i++) { |
|
234 |
ASSERT(!list_link_active(&db->db_dirty_node[i])); |
|
235 |
ASSERT(db->db_level != 0 || db->db_d.db_data_old[i] == NULL); |
|
236 |
} |
|
237 |
#endif |
|
238 |
dbuf_clear(db); |
|
239 |
dbuf_destroy(db); |
|
240 |
} |
|
241 |
||
242 |
void |
|
789 | 243 |
dbuf_init(void) |
244 |
{ |
|
1544 | 245 |
uint64_t hsize = 1ULL << 16; |
789 | 246 |
dbuf_hash_table_t *h = &dbuf_hash_table; |
247 |
int i; |
|
248 |
||
249 |
/* |
|
250 |
* The hash table is big enough to fill all of physical memory |
|
1544 | 251 |
* with an average 4K block size. The table will take up |
252 |
* totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers). |
|
789 | 253 |
*/ |
1544 | 254 |
while (hsize * 4096 < physmem * PAGESIZE) |
789 | 255 |
hsize <<= 1; |
256 |
||
1544 | 257 |
retry: |
789 | 258 |
h->hash_table_mask = hsize - 1; |
1544 | 259 |
h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); |
260 |
if (h->hash_table == NULL) { |
|
261 |
/* XXX - we should really return an error instead of assert */ |
|
262 |
ASSERT(hsize > (1ULL << 10)); |
|
263 |
hsize >>= 1; |
|
264 |
goto retry; |
|
265 |
} |
|
789 | 266 |
|
267 |
dbuf_cache = kmem_cache_create("dmu_buf_impl_t", |
|
268 |
sizeof (dmu_buf_impl_t), |
|
269 |
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); |
|
270 |
dbuf_tq = taskq_create("dbuf_tq", 8, maxclsyspri, 50, INT_MAX, |
|
271 |
TASKQ_PREPOPULATE); |
|
272 |
||
273 |
for (i = 0; i < DBUF_MUTEXES; i++) |
|
274 |
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); |
|
275 |
} |
|
276 |
||
277 |
void |
|
278 |
dbuf_fini(void) |
|
279 |
{ |
|
280 |
dbuf_hash_table_t *h = &dbuf_hash_table; |
|
281 |
int i; |
|
282 |
||
283 |
taskq_destroy(dbuf_tq); |
|
284 |
dbuf_tq = NULL; |
|
285 |
||
286 |
for (i = 0; i < DBUF_MUTEXES; i++) |
|
287 |
mutex_destroy(&h->hash_mutexes[i]); |
|
288 |
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); |
|
289 |
kmem_cache_destroy(dbuf_cache); |
|
290 |
} |
|
291 |
||
292 |
/* |
|
293 |
* Other stuff. |
|
294 |
*/ |
|
295 |
||
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
296 |
#ifdef ZFS_DEBUG |
789 | 297 |
static void |
298 |
dbuf_verify(dmu_buf_impl_t *db) |
|
299 |
{ |
|
300 |
int i; |
|
301 |
dnode_t *dn = db->db_dnode; |
|
302 |
||
303 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
304 |
||
305 |
if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY)) |
|
306 |
return; |
|
307 |
||
308 |
ASSERT(db->db_objset != NULL); |
|
309 |
if (dn == NULL) { |
|
310 |
ASSERT(db->db_parent == NULL); |
|
311 |
ASSERT(db->db_blkptr == NULL); |
|
312 |
} else { |
|
313 |
ASSERT3U(db->db.db_object, ==, dn->dn_object); |
|
314 |
ASSERT3P(db->db_objset, ==, dn->dn_objset); |
|
315 |
ASSERT3U(db->db_level, <, dn->dn_nlevels); |
|
1544 | 316 |
ASSERT(db->db_blkid == DB_BONUS_BLKID || |
317 |
list_head(&dn->dn_dbufs)); |
|
789 | 318 |
} |
319 |
if (db->db_blkid == DB_BONUS_BLKID) { |
|
320 |
ASSERT(dn != NULL); |
|
321 |
ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen); |
|
322 |
ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID); |
|
323 |
} else { |
|
324 |
ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); |
|
325 |
} |
|
326 |
||
327 |
if (db->db_level == 0) { |
|
328 |
/* we can be momentarily larger in dnode_set_blksz() */ |
|
329 |
if (db->db_blkid != DB_BONUS_BLKID && dn) { |
|
330 |
ASSERT3U(db->db.db_size, >=, dn->dn_datablksz); |
|
331 |
} |
|
1544 | 332 |
if (db->db.db_object == DMU_META_DNODE_OBJECT) { |
789 | 333 |
for (i = 0; i < TXG_SIZE; i++) { |
334 |
/* |
|
335 |
* it should only be modified in syncing |
|
336 |
* context, so make sure we only have |
|
337 |
* one copy of the data. |
|
338 |
*/ |
|
339 |
ASSERT(db->db_d.db_data_old[i] == NULL || |
|
340 |
db->db_d.db_data_old[i] == db->db_buf); |
|
341 |
} |
|
342 |
} |
|
343 |
} |
|
344 |
||
345 |
/* verify db->db_blkptr */ |
|
346 |
if (db->db_blkptr) { |
|
347 |
if (db->db_parent == dn->dn_dbuf) { |
|
348 |
/* db is pointed to by the dnode */ |
|
349 |
/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */ |
|
1544 | 350 |
if (db->db.db_object == DMU_META_DNODE_OBJECT) |
789 | 351 |
ASSERT(db->db_parent == NULL); |
352 |
else |
|
353 |
ASSERT(db->db_parent != NULL); |
|
354 |
ASSERT3P(db->db_blkptr, ==, |
|
355 |
&dn->dn_phys->dn_blkptr[db->db_blkid]); |
|
356 |
} else { |
|
357 |
/* db is pointed to by an indirect block */ |
|
358 |
int epb = db->db_parent->db.db_size >> SPA_BLKPTRSHIFT; |
|
359 |
ASSERT3U(db->db_parent->db_level, ==, db->db_level+1); |
|
360 |
ASSERT3U(db->db_parent->db.db_object, ==, |
|
361 |
db->db.db_object); |
|
362 |
/* |
|
363 |
* dnode_grow_indblksz() can make this fail if we don't |
|
364 |
* have the struct_rwlock. XXX indblksz no longer |
|
365 |
* grows. safe to do this now? |
|
366 |
*/ |
|
367 |
if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock)) { |
|
368 |
ASSERT3P(db->db_blkptr, ==, |
|
369 |
((blkptr_t *)db->db_parent->db.db_data + |
|
370 |
db->db_blkid % epb)); |
|
371 |
} |
|
372 |
} |
|
373 |
} |
|
374 |
if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) && |
|
375 |
db->db.db_data && db->db_blkid != DB_BONUS_BLKID && |
|
376 |
db->db_state != DB_FILL && !dn->dn_free_txg) { |
|
377 |
/* |
|
378 |
* If the blkptr isn't set but they have nonzero data, |
|
379 |
* it had better be dirty, otherwise we'll lose that |
|
380 |
* data when we evict this buffer. |
|
381 |
*/ |
|
382 |
if (db->db_dirtycnt == 0) { |
|
383 |
uint64_t *buf = db->db.db_data; |
|
384 |
int i; |
|
385 |
||
386 |
for (i = 0; i < db->db.db_size >> 3; i++) { |
|
387 |
ASSERT(buf[i] == 0); |
|
388 |
} |
|
389 |
} |
|
390 |
} |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
391 |
} |
789 | 392 |
#endif |
393 |
||
394 |
static void |
|
395 |
dbuf_update_data(dmu_buf_impl_t *db) |
|
396 |
{ |
|
397 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
398 |
if (db->db_level == 0 && db->db_d.db_user_data_ptr_ptr) { |
|
399 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
400 |
*db->db_d.db_user_data_ptr_ptr = db->db.db_data; |
|
401 |
} |
|
402 |
} |
|
403 |
||
404 |
static void |
|
405 |
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) |
|
406 |
{ |
|
407 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
1544 | 408 |
ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf)); |
789 | 409 |
db->db_buf = buf; |
1544 | 410 |
if (buf != NULL) { |
411 |
ASSERT(buf->b_data != NULL); |
|
412 |
db->db.db_data = buf->b_data; |
|
413 |
if (!arc_released(buf)) |
|
414 |
arc_set_callback(buf, dbuf_do_evict, db); |
|
415 |
dbuf_update_data(db); |
|
416 |
} else { |
|
417 |
dbuf_evict_user(db); |
|
418 |
db->db.db_data = NULL; |
|
419 |
db->db_state = DB_UNCACHED; |
|
420 |
} |
|
789 | 421 |
} |
422 |
||
423 |
uint64_t |
|
424 |
dbuf_whichblock(dnode_t *dn, uint64_t offset) |
|
425 |
{ |
|
426 |
if (dn->dn_datablkshift) { |
|
427 |
return (offset >> dn->dn_datablkshift); |
|
428 |
} else { |
|
429 |
ASSERT3U(offset, <, dn->dn_datablksz); |
|
430 |
return (0); |
|
431 |
} |
|
432 |
} |
|
433 |
||
434 |
static void |
|
435 |
dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) |
|
436 |
{ |
|
437 |
dmu_buf_impl_t *db = vdb; |
|
438 |
||
439 |
mutex_enter(&db->db_mtx); |
|
440 |
ASSERT3U(db->db_state, ==, DB_READ); |
|
441 |
/* |
|
442 |
* All reads are synchronous, so we must have a hold on the dbuf |
|
443 |
*/ |
|
444 |
ASSERT(refcount_count(&db->db_holds) > 0); |
|
1544 | 445 |
ASSERT(db->db_buf == NULL); |
789 | 446 |
ASSERT(db->db.db_data == NULL); |
447 |
if (db->db_level == 0 && db->db_d.db_freed_in_flight) { |
|
448 |
/* we were freed in flight; disregard any error */ |
|
449 |
arc_release(buf, db); |
|
450 |
bzero(buf->b_data, db->db.db_size); |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
451 |
arc_buf_freeze(buf); |
789 | 452 |
db->db_d.db_freed_in_flight = FALSE; |
453 |
dbuf_set_data(db, buf); |
|
454 |
db->db_state = DB_CACHED; |
|
455 |
} else if (zio == NULL || zio->io_error == 0) { |
|
456 |
dbuf_set_data(db, buf); |
|
457 |
db->db_state = DB_CACHED; |
|
458 |
} else { |
|
459 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
|
1544 | 460 |
ASSERT3P(db->db_buf, ==, NULL); |
461 |
VERIFY(arc_buf_remove_ref(buf, db) == 1); |
|
789 | 462 |
db->db_state = DB_UNCACHED; |
463 |
} |
|
464 |
cv_broadcast(&db->db_changed); |
|
465 |
mutex_exit(&db->db_mtx); |
|
1544 | 466 |
dbuf_rele(db, NULL); |
789 | 467 |
} |
468 |
||
1544 | 469 |
static void |
2391 | 470 |
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) |
789 | 471 |
{ |
472 |
blkptr_t *bp; |
|
1544 | 473 |
zbookmark_t zb; |
2391 | 474 |
uint32_t aflags = ARC_NOWAIT; |
789 | 475 |
|
476 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
477 |
/* We need the struct_rwlock to prevent db_blkptr from changing. */ |
|
478 |
ASSERT(RW_LOCK_HELD(&db->db_dnode->dn_struct_rwlock)); |
|
1544 | 479 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
480 |
ASSERT(db->db_state == DB_UNCACHED); |
|
481 |
ASSERT(db->db_buf == NULL); |
|
789 | 482 |
|
483 |
if (db->db_blkid == DB_BONUS_BLKID) { |
|
484 |
ASSERT3U(db->db_dnode->dn_bonuslen, ==, db->db.db_size); |
|
1544 | 485 |
db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); |
789 | 486 |
if (db->db.db_size < DN_MAX_BONUSLEN) |
1544 | 487 |
bzero(db->db.db_data, DN_MAX_BONUSLEN); |
488 |
bcopy(DN_BONUS(db->db_dnode->dn_phys), db->db.db_data, |
|
789 | 489 |
db->db.db_size); |
1544 | 490 |
dbuf_update_data(db); |
789 | 491 |
db->db_state = DB_CACHED; |
492 |
mutex_exit(&db->db_mtx); |
|
493 |
return; |
|
494 |
} |
|
495 |
||
496 |
if (db->db_level == 0 && dnode_block_freed(db->db_dnode, db->db_blkid)) |
|
497 |
bp = NULL; |
|
498 |
else |
|
499 |
bp = db->db_blkptr; |
|
500 |
||
501 |
if (bp == NULL) |
|
502 |
dprintf_dbuf(db, "blkptr: %s\n", "NULL"); |
|
503 |
else |
|
504 |
dprintf_dbuf_bp(db, bp, "%s", "blkptr:"); |
|
505 |
||
506 |
if (bp == NULL || BP_IS_HOLE(bp)) { |
|
3290 | 507 |
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); |
508 |
||
789 | 509 |
ASSERT(bp == NULL || BP_IS_HOLE(bp)); |
510 |
dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa, |
|
3290 | 511 |
db->db.db_size, db, type)); |
789 | 512 |
bzero(db->db.db_data, db->db.db_size); |
513 |
db->db_state = DB_CACHED; |
|
2391 | 514 |
*flags |= DB_RF_CACHED; |
789 | 515 |
mutex_exit(&db->db_mtx); |
516 |
return; |
|
517 |
} |
|
518 |
||
519 |
db->db_state = DB_READ; |
|
520 |
mutex_exit(&db->db_mtx); |
|
521 |
||
1544 | 522 |
zb.zb_objset = db->db_objset->os_dsl_dataset ? |
523 |
db->db_objset->os_dsl_dataset->ds_object : 0; |
|
524 |
zb.zb_object = db->db.db_object; |
|
525 |
zb.zb_level = db->db_level; |
|
526 |
zb.zb_blkid = db->db_blkid; |
|
527 |
||
528 |
dbuf_add_ref(db, NULL); |
|
789 | 529 |
/* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */ |
530 |
(void) arc_read(zio, db->db_dnode->dn_objset->os_spa, bp, |
|
531 |
db->db_level > 0 ? byteswap_uint64_array : |
|
532 |
dmu_ot[db->db_dnode->dn_type].ot_byteswap, |
|
533 |
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, |
|
2391 | 534 |
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, |
535 |
&aflags, &zb); |
|
536 |
if (aflags & ARC_CACHED) |
|
537 |
*flags |= DB_RF_CACHED; |
|
789 | 538 |
} |
539 |
||
1544 | 540 |
int |
541 |
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) |
|
789 | 542 |
{ |
1544 | 543 |
int err = 0; |
544 |
int havepzio = (zio != NULL); |
|
2391 | 545 |
int prefetch; |
789 | 546 |
|
547 |
/* |
|
548 |
* We don't have to hold the mutex to check db_state because it |
|
549 |
* can't be freed while we have a hold on the buffer. |
|
550 |
*/ |
|
551 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
552 |
||
1544 | 553 |
if ((flags & DB_RF_HAVESTRUCT) == 0) |
554 |
rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); |
|
789 | 555 |
|
2391 | 556 |
prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID && |
557 |
(flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL; |
|
558 |
||
789 | 559 |
mutex_enter(&db->db_mtx); |
1544 | 560 |
if (db->db_state == DB_CACHED) { |
561 |
mutex_exit(&db->db_mtx); |
|
2391 | 562 |
if (prefetch) |
563 |
dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset, |
|
564 |
db->db.db_size, TRUE); |
|
1544 | 565 |
if ((flags & DB_RF_HAVESTRUCT) == 0) |
566 |
rw_exit(&db->db_dnode->dn_struct_rwlock); |
|
567 |
} else if (db->db_state == DB_UNCACHED) { |
|
568 |
if (zio == NULL) { |
|
569 |
zio = zio_root(db->db_dnode->dn_objset->os_spa, |
|
570 |
NULL, NULL, ZIO_FLAG_CANFAIL); |
|
571 |
} |
|
2391 | 572 |
dbuf_read_impl(db, zio, &flags); |
573 |
||
1544 | 574 |
/* dbuf_read_impl has dropped db_mtx for us */ |
789 | 575 |
|
2391 | 576 |
if (prefetch) |
1544 | 577 |
dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset, |
2391 | 578 |
db->db.db_size, flags & DB_RF_CACHED); |
789 | 579 |
|
1544 | 580 |
if ((flags & DB_RF_HAVESTRUCT) == 0) |
581 |
rw_exit(&db->db_dnode->dn_struct_rwlock); |
|
789 | 582 |
|
1544 | 583 |
if (!havepzio) |
584 |
err = zio_wait(zio); |
|
585 |
} else { |
|
2391 | 586 |
mutex_exit(&db->db_mtx); |
587 |
if (prefetch) |
|
588 |
dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset, |
|
589 |
db->db.db_size, TRUE); |
|
1544 | 590 |
if ((flags & DB_RF_HAVESTRUCT) == 0) |
591 |
rw_exit(&db->db_dnode->dn_struct_rwlock); |
|
2391 | 592 |
|
593 |
mutex_enter(&db->db_mtx); |
|
1544 | 594 |
if ((flags & DB_RF_NEVERWAIT) == 0) { |
595 |
while (db->db_state == DB_READ || |
|
596 |
db->db_state == DB_FILL) { |
|
597 |
ASSERT(db->db_state == DB_READ || |
|
598 |
(flags & DB_RF_HAVESTRUCT) == 0); |
|
599 |
cv_wait(&db->db_changed, &db->db_mtx); |
|
600 |
} |
|
601 |
if (db->db_state == DB_UNCACHED) |
|
602 |
err = EIO; |
|
603 |
} |
|
604 |
mutex_exit(&db->db_mtx); |
|
605 |
} |
|
789 | 606 |
|
1544 | 607 |
ASSERT(err || havepzio || db->db_state == DB_CACHED); |
608 |
return (err); |
|
789 | 609 |
} |
610 |
||
611 |
static void |
|
612 |
dbuf_noread(dmu_buf_impl_t *db) |
|
613 |
{ |
|
614 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
1544 | 615 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
789 | 616 |
mutex_enter(&db->db_mtx); |
617 |
while (db->db_state == DB_READ || db->db_state == DB_FILL) |
|
618 |
cv_wait(&db->db_changed, &db->db_mtx); |
|
619 |
if (db->db_state == DB_UNCACHED) { |
|
3290 | 620 |
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); |
621 |
||
1544 | 622 |
ASSERT(db->db_buf == NULL); |
789 | 623 |
ASSERT(db->db.db_data == NULL); |
624 |
dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa, |
|
3290 | 625 |
db->db.db_size, db, type)); |
789 | 626 |
db->db_state = DB_FILL; |
627 |
} else { |
|
628 |
ASSERT3U(db->db_state, ==, DB_CACHED); |
|
629 |
} |
|
630 |
mutex_exit(&db->db_mtx); |
|
631 |
} |
|
632 |
||
633 |
/* |
|
634 |
* This is our just-in-time copy function. It makes a copy of |
|
635 |
* buffers, that have been modified in a previous transaction |
|
636 |
* group, before we modify them in the current active group. |
|
637 |
* |
|
638 |
* This function is used in two places: when we are dirtying a |
|
639 |
* buffer for the first time in a txg, and when we are freeing |
|
640 |
* a range in a dnode that includes this buffer. |
|
641 |
* |
|
642 |
* Note that when we are called from dbuf_free_range() we do |
|
643 |
* not put a hold on the buffer, we just traverse the active |
|
644 |
* dbuf list for the dnode. |
|
645 |
*/ |
|
646 |
static void |
|
647 |
dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) |
|
648 |
{ |
|
649 |
arc_buf_t **quiescing, **syncing; |
|
3290 | 650 |
arc_buf_contents_t type; |
789 | 651 |
|
652 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
653 |
ASSERT(db->db.db_data != NULL); |
|
1544 | 654 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
789 | 655 |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
656 |
quiescing = &db->db_d.db_data_old[(txg-1)&TXG_MASK]; |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
657 |
syncing = &db->db_d.db_data_old[(txg-2)&TXG_MASK]; |
789 | 658 |
|
659 |
/* |
|
660 |
* If this buffer is referenced from the current quiescing |
|
661 |
* transaction group: either make a copy and reset the reference |
|
662 |
* to point to the copy, or (if there a no active holders) just |
|
663 |
* null out the current db_data pointer. |
|
664 |
*/ |
|
665 |
if (*quiescing == db->db_buf) { |
|
666 |
/* |
|
667 |
* If the quiescing txg is "dirty", then we better not |
|
668 |
* be referencing the same buffer from the syncing txg. |
|
669 |
*/ |
|
670 |
ASSERT(*syncing != db->db_buf); |
|
671 |
if (refcount_count(&db->db_holds) > db->db_dirtycnt) { |
|
1544 | 672 |
int size = db->db.db_size; |
3290 | 673 |
type = DBUF_GET_BUFC_TYPE(db); |
789 | 674 |
*quiescing = arc_buf_alloc( |
3290 | 675 |
db->db_dnode->dn_objset->os_spa, size, db, type); |
789 | 676 |
bcopy(db->db.db_data, (*quiescing)->b_data, size); |
677 |
} else { |
|
1544 | 678 |
dbuf_set_data(db, NULL); |
789 | 679 |
} |
680 |
return; |
|
681 |
} |
|
682 |
||
683 |
/* |
|
684 |
* If this buffer is referenced from the current syncing |
|
685 |
* transaction group: either |
|
686 |
* 1 - make a copy and reset the reference, or |
|
687 |
* 2 - if there are no holders, just null the current db_data. |
|
688 |
*/ |
|
689 |
if (*syncing == db->db_buf) { |
|
690 |
ASSERT3P(*quiescing, ==, NULL); |
|
691 |
ASSERT3U(db->db_dirtycnt, ==, 1); |
|
692 |
if (refcount_count(&db->db_holds) > db->db_dirtycnt) { |
|
1544 | 693 |
int size = db->db.db_size; |
3290 | 694 |
type = DBUF_GET_BUFC_TYPE(db); |
789 | 695 |
/* we can't copy if we have already started a write */ |
696 |
ASSERT(*syncing != db->db_data_pending); |
|
697 |
*syncing = arc_buf_alloc( |
|
3290 | 698 |
db->db_dnode->dn_objset->os_spa, size, db, type); |
789 | 699 |
bcopy(db->db.db_data, (*syncing)->b_data, size); |
700 |
} else { |
|
1544 | 701 |
dbuf_set_data(db, NULL); |
789 | 702 |
} |
703 |
} |
|
704 |
} |
|
705 |
||
1544 | 706 |
/* |
707 |
* This is the "bonus buffer" version of the above routine |
|
708 |
*/ |
|
709 |
static void |
|
710 |
dbuf_fix_old_bonus_data(dmu_buf_impl_t *db, uint64_t txg) |
|
711 |
{ |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
712 |
arc_buf_t **quiescing, **syncing; |
1544 | 713 |
|
714 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
715 |
ASSERT(db->db.db_data != NULL); |
|
716 |
ASSERT(db->db_blkid == DB_BONUS_BLKID); |
|
717 |
||
718 |
quiescing = &db->db_d.db_data_old[(txg-1)&TXG_MASK]; |
|
719 |
syncing = &db->db_d.db_data_old[(txg-2)&TXG_MASK]; |
|
720 |
||
721 |
if (*quiescing == db->db.db_data) { |
|
722 |
ASSERT(*syncing != db->db.db_data); |
|
723 |
*quiescing = zio_buf_alloc(DN_MAX_BONUSLEN); |
|
724 |
bcopy(db->db.db_data, *quiescing, DN_MAX_BONUSLEN); |
|
725 |
} else if (*syncing == db->db.db_data) { |
|
726 |
ASSERT3P(*quiescing, ==, NULL); |
|
727 |
ASSERT3U(db->db_dirtycnt, ==, 1); |
|
728 |
*syncing = zio_buf_alloc(DN_MAX_BONUSLEN); |
|
729 |
bcopy(db->db.db_data, *syncing, DN_MAX_BONUSLEN); |
|
730 |
} |
|
731 |
} |
|
732 |
||
789 | 733 |
void |
734 |
dbuf_unoverride(dmu_buf_impl_t *db, uint64_t txg) |
|
735 |
{ |
|
1544 | 736 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
789 | 737 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
2237 | 738 |
ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] != IN_DMU_SYNC); |
739 |
||
740 |
if (db->db_d.db_overridden_by[txg&TXG_MASK] != NULL) { |
|
789 | 741 |
/* free this block */ |
742 |
ASSERT(list_link_active(&db->db_dirty_node[txg&TXG_MASK]) || |
|
743 |
db->db_dnode->dn_free_txg == txg); |
|
744 |
if (!BP_IS_HOLE(db->db_d.db_overridden_by[txg&TXG_MASK])) { |
|
745 |
/* XXX can get silent EIO here */ |
|
746 |
(void) arc_free(NULL, db->db_dnode->dn_objset->os_spa, |
|
747 |
txg, db->db_d.db_overridden_by[txg&TXG_MASK], |
|
748 |
NULL, NULL, ARC_WAIT); |
|
749 |
} |
|
750 |
kmem_free(db->db_d.db_overridden_by[txg&TXG_MASK], |
|
751 |
sizeof (blkptr_t)); |
|
752 |
db->db_d.db_overridden_by[txg&TXG_MASK] = NULL; |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
753 |
/* |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
754 |
* Release the already-written buffer, so we leave it in |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
755 |
* a consistent dirty state. Note that all callers are |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
756 |
* modifying the buffer, so they will immediately do |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
757 |
* another (redundant) arc_release(). Therefore, leave |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
758 |
* the buf thawed to save the effort of freezing & |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
759 |
* immediately re-thawing it. |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
760 |
*/ |
789 | 761 |
arc_release(db->db_d.db_data_old[txg&TXG_MASK], db); |
762 |
} |
|
763 |
} |
|
764 |
||
765 |
void |
|
766 |
dbuf_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) |
|
767 |
{ |
|
768 |
dmu_buf_impl_t *db, *db_next; |
|
769 |
uint64_t txg = tx->tx_txg; |
|
770 |
||
771 |
dprintf_dnode(dn, "blkid=%llu nblks=%llu\n", blkid, nblks); |
|
772 |
mutex_enter(&dn->dn_dbufs_mtx); |
|
773 |
for (db = list_head(&dn->dn_dbufs); db; db = db_next) { |
|
774 |
db_next = list_next(&dn->dn_dbufs, db); |
|
1544 | 775 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
776 |
if (db->db_level != 0) |
|
789 | 777 |
continue; |
778 |
dprintf_dbuf(db, "found buf %s\n", ""); |
|
779 |
if (db->db_blkid < blkid || |
|
780 |
db->db_blkid >= blkid+nblks) |
|
781 |
continue; |
|
782 |
||
783 |
/* found a level 0 buffer in the range */ |
|
784 |
if (dbuf_undirty(db, tx)) |
|
785 |
continue; |
|
786 |
||
787 |
mutex_enter(&db->db_mtx); |
|
1544 | 788 |
if (db->db_state == DB_UNCACHED || |
789 |
db->db_state == DB_EVICTING) { |
|
789 | 790 |
ASSERT(db->db.db_data == NULL); |
791 |
mutex_exit(&db->db_mtx); |
|
792 |
continue; |
|
793 |
} |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
794 |
if (db->db_state == DB_READ || db->db_state == DB_FILL) { |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
795 |
/* will be handled in dbuf_read_done or dbuf_rele */ |
789 | 796 |
db->db_d.db_freed_in_flight = TRUE; |
797 |
mutex_exit(&db->db_mtx); |
|
798 |
continue; |
|
799 |
} |
|
1544 | 800 |
if (refcount_count(&db->db_holds) == 0) { |
801 |
ASSERT(db->db_buf); |
|
802 |
dbuf_clear(db); |
|
803 |
continue; |
|
804 |
} |
|
805 |
/* The dbuf is CACHED and referenced */ |
|
789 | 806 |
|
1544 | 807 |
if (!list_link_active(&db->db_dirty_node[txg & TXG_MASK])) { |
808 |
/* |
|
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
809 |
* This dbuf is not currently dirty. Either |
1544 | 810 |
* uncache it (if its not referenced in the open |
811 |
* context) or reset its contents to empty. |
|
812 |
*/ |
|
813 |
dbuf_fix_old_data(db, txg); |
|
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
814 |
} else { |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
815 |
if (db->db_d.db_overridden_by[txg & TXG_MASK] != NULL) { |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
816 |
/* |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
817 |
* This dbuf is overridden. Clear that state. |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
818 |
*/ |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
819 |
dbuf_unoverride(db, txg); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
820 |
} |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
821 |
if (db->db_blkid > dn->dn_maxblkid) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
822 |
dn->dn_maxblkid = db->db_blkid; |
1544 | 823 |
} |
824 |
/* fill in with appropriate data */ |
|
825 |
if (db->db_state == DB_CACHED) { |
|
826 |
ASSERT(db->db.db_data != NULL); |
|
789 | 827 |
arc_release(db->db_buf, db); |
828 |
bzero(db->db.db_data, db->db.db_size); |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
829 |
arc_buf_freeze(db->db_buf); |
789 | 830 |
} |
1544 | 831 |
|
789 | 832 |
mutex_exit(&db->db_mtx); |
833 |
} |
|
834 |
mutex_exit(&dn->dn_dbufs_mtx); |
|
835 |
} |
|
836 |
||
837 |
static int |
|
1544 | 838 |
dbuf_new_block(dmu_buf_impl_t *db) |
789 | 839 |
{ |
840 |
dsl_dataset_t *ds = db->db_objset->os_dsl_dataset; |
|
841 |
uint64_t birth_txg = 0; |
|
842 |
||
843 |
/* Don't count meta-objects */ |
|
844 |
if (ds == NULL) |
|
845 |
return (FALSE); |
|
846 |
||
847 |
/* |
|
848 |
* We don't need any locking to protect db_blkptr: |
|
849 |
* If it's syncing, then db_dirtied will be set so we'll |
|
850 |
* ignore db_blkptr. |
|
851 |
*/ |
|
852 |
ASSERT(MUTEX_HELD(&db->db_mtx)); /* XXX strictly necessary? */ |
|
853 |
/* If we have been dirtied since the last snapshot, its not new */ |
|
854 |
if (db->db_dirtied) |
|
855 |
birth_txg = db->db_dirtied; |
|
856 |
else if (db->db_blkptr) |
|
857 |
birth_txg = db->db_blkptr->blk_birth; |
|
858 |
||
859 |
if (birth_txg) |
|
1544 | 860 |
return (!dsl_dataset_block_freeable(ds, birth_txg)); |
789 | 861 |
else |
862 |
return (TRUE); |
|
863 |
} |
|
864 |
||
865 |
void |
|
866 |
dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) |
|
867 |
{ |
|
868 |
arc_buf_t *buf, *obuf; |
|
869 |
int osize = db->db.db_size; |
|
3290 | 870 |
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); |
789 | 871 |
|
1544 | 872 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
873 |
||
789 | 874 |
/* XXX does *this* func really need the lock? */ |
875 |
ASSERT(RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock)); |
|
876 |
||
877 |
/* |
|
878 |
* This call to dbuf_will_dirty() with the dn_struct_rwlock held |
|
879 |
* is OK, because there can be no other references to the db |
|
880 |
* when we are changing its size, so no concurrent DB_FILL can |
|
881 |
* be happening. |
|
882 |
*/ |
|
1544 | 883 |
/* |
884 |
* XXX we should be doing a dbuf_read, checking the return |
|
885 |
* value and returning that up to our callers |
|
886 |
*/ |
|
789 | 887 |
dbuf_will_dirty(db, tx); |
888 |
||
889 |
/* create the data buffer for the new block */ |
|
3290 | 890 |
buf = arc_buf_alloc(db->db_dnode->dn_objset->os_spa, size, db, type); |
789 | 891 |
|
892 |
/* copy old block data to the new block */ |
|
893 |
obuf = db->db_buf; |
|
1491
bdcb30e07e7d
6389368 fat zap should use 16k blocks (with backwards compatability)
ahrens
parents:
1199
diff
changeset
|
894 |
bcopy(obuf->b_data, buf->b_data, MIN(osize, size)); |
789 | 895 |
/* zero the remainder */ |
1491
bdcb30e07e7d
6389368 fat zap should use 16k blocks (with backwards compatability)
ahrens
parents:
1199
diff
changeset
|
896 |
if (size > osize) |
bdcb30e07e7d
6389368 fat zap should use 16k blocks (with backwards compatability)
ahrens
parents:
1199
diff
changeset
|
897 |
bzero((uint8_t *)buf->b_data + osize, size - osize); |
789 | 898 |
|
899 |
mutex_enter(&db->db_mtx); |
|
900 |
dbuf_set_data(db, buf); |
|
1544 | 901 |
VERIFY(arc_buf_remove_ref(obuf, db) == 1); |
789 | 902 |
db->db.db_size = size; |
903 |
||
904 |
if (db->db_level == 0) |
|
905 |
db->db_d.db_data_old[tx->tx_txg&TXG_MASK] = buf; |
|
906 |
mutex_exit(&db->db_mtx); |
|
907 |
||
908 |
dnode_willuse_space(db->db_dnode, size-osize, tx); |
|
909 |
} |
|
910 |
||
911 |
void |
|
912 |
dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) |
|
913 |
{ |
|
914 |
dnode_t *dn = db->db_dnode; |
|
915 |
objset_impl_t *os = dn->dn_objset; |
|
916 |
int drop_struct_lock = FALSE; |
|
917 |
int txgoff = tx->tx_txg & TXG_MASK; |
|
918 |
||
919 |
ASSERT(tx->tx_txg != 0); |
|
920 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
921 |
DMU_TX_DIRTY_BUF(tx, db); |
789 | 922 |
|
923 |
/* |
|
924 |
* Shouldn't dirty a regular buffer in syncing context. Private |
|
925 |
* objects may be dirtied in syncing context, but only if they |
|
926 |
* were already pre-dirtied in open context. |
|
927 |
* XXX We may want to prohibit dirtying in syncing context even |
|
928 |
* if they did pre-dirty. |
|
929 |
*/ |
|
930 |
ASSERT(!(dmu_tx_is_syncing(tx) && |
|
931 |
!BP_IS_HOLE(&dn->dn_objset->os_rootbp) && |
|
1544 | 932 |
dn->dn_object != DMU_META_DNODE_OBJECT && |
789 | 933 |
dn->dn_objset->os_dsl_dataset != NULL && |
934 |
!dsl_dir_is_private( |
|
935 |
dn->dn_objset->os_dsl_dataset->ds_dir))); |
|
936 |
||
937 |
/* |
|
938 |
* We make this assert for private objects as well, but after we |
|
939 |
* check if we're already dirty. They are allowed to re-dirty |
|
940 |
* in syncing context. |
|
941 |
*/ |
|
1544 | 942 |
ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || |
789 | 943 |
dn->dn_dirtyctx == DN_UNDIRTIED || |
944 |
dn->dn_dirtyctx == |
|
945 |
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); |
|
946 |
||
947 |
mutex_enter(&db->db_mtx); |
|
948 |
/* XXX make this true for indirects too? */ |
|
949 |
ASSERT(db->db_level != 0 || db->db_state == DB_CACHED || |
|
950 |
db->db_state == DB_FILL); |
|
951 |
||
952 |
/* |
|
953 |
* If this buffer is currently part of an "overridden" region, |
|
954 |
* we now need to remove it from that region. |
|
955 |
*/ |
|
956 |
if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID && |
|
957 |
db->db_d.db_overridden_by[txgoff] != NULL) { |
|
958 |
dbuf_unoverride(db, tx->tx_txg); |
|
959 |
} |
|
960 |
||
961 |
mutex_enter(&dn->dn_mtx); |
|
962 |
/* |
|
963 |
* Don't set dirtyctx to SYNC if we're just modifying this as we |
|
964 |
* initialize the objset. |
|
965 |
*/ |
|
966 |
if (dn->dn_dirtyctx == DN_UNDIRTIED && |
|
967 |
!BP_IS_HOLE(&dn->dn_objset->os_rootbp)) { |
|
968 |
dn->dn_dirtyctx = |
|
969 |
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); |
|
970 |
ASSERT(dn->dn_dirtyctx_firstset == NULL); |
|
971 |
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); |
|
972 |
} |
|
973 |
mutex_exit(&dn->dn_mtx); |
|
974 |
||
975 |
/* |
|
976 |
* If this buffer is already dirty, we're done. |
|
977 |
*/ |
|
978 |
if (list_link_active(&db->db_dirty_node[txgoff])) { |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
979 |
if (db->db_blkid != DB_BONUS_BLKID && db->db_level == 0 && |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
980 |
db->db.db_object != DMU_META_DNODE_OBJECT) |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
981 |
arc_buf_thaw(db->db_buf); |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
982 |
|
789 | 983 |
mutex_exit(&db->db_mtx); |
984 |
return; |
|
985 |
} |
|
986 |
||
987 |
/* |
|
988 |
* Only valid if not already dirty. |
|
989 |
*/ |
|
990 |
ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == |
|
991 |
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); |
|
992 |
||
993 |
ASSERT3U(dn->dn_nlevels, >, db->db_level); |
|
994 |
ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) || |
|
995 |
dn->dn_phys->dn_nlevels > db->db_level || |
|
996 |
dn->dn_next_nlevels[txgoff] > db->db_level || |
|
997 |
dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || |
|
998 |
dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); |
|
999 |
||
1000 |
/* |
|
1001 |
* We should only be dirtying in syncing context if it's the |
|
1002 |
* mos, a spa os, or we're initializing the os. However, we are |
|
1003 |
* allowed to dirty in syncing context provided we already |
|
1004 |
* dirtied it in open context. Hence we must make this |
|
1005 |
* assertion only if we're not already dirty. |
|
1006 |
*/ |
|
1007 |
ASSERT(!dmu_tx_is_syncing(tx) || |
|
1008 |
os->os_dsl_dataset == NULL || |
|
1009 |
!dsl_dir_is_private(os->os_dsl_dataset->ds_dir) || |
|
1010 |
!BP_IS_HOLE(&os->os_rootbp)); |
|
1011 |
ASSERT(db->db.db_size != 0); |
|
1012 |
||
1013 |
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); |
|
1014 |
||
1544 | 1015 |
/* |
1016 |
* If this buffer is dirty in an old transaction group we need |
|
1017 |
* to make a copy of it so that the changes we make in this |
|
1018 |
* transaction group won't leak out when we sync the older txg. |
|
1019 |
*/ |
|
1020 |
if (db->db_blkid == DB_BONUS_BLKID) { |
|
1021 |
ASSERT(db->db.db_data != NULL); |
|
1022 |
ASSERT(db->db_d.db_data_old[txgoff] == NULL); |
|
1023 |
dbuf_fix_old_bonus_data(db, tx->tx_txg); |
|
1024 |
db->db_d.db_data_old[txgoff] = db->db.db_data; |
|
1025 |
} else if (db->db_level == 0) { |
|
789 | 1026 |
/* |
1027 |
* Release the data buffer from the cache so that we |
|
1028 |
* can modify it without impacting possible other users |
|
1029 |
* of this cached data block. Note that indirect blocks |
|
1030 |
* and private objects are not released until the syncing |
|
1031 |
* state (since they are only modified then). |
|
1032 |
*/ |
|
1033 |
ASSERT(db->db_buf != NULL); |
|
1034 |
ASSERT(db->db_d.db_data_old[txgoff] == NULL); |
|
1544 | 1035 |
if (db->db.db_object != DMU_META_DNODE_OBJECT) { |
789 | 1036 |
arc_release(db->db_buf, db); |
1037 |
dbuf_fix_old_data(db, tx->tx_txg); |
|
1038 |
ASSERT(db->db_buf != NULL); |
|
1039 |
} |
|
1040 |
db->db_d.db_data_old[txgoff] = db->db_buf; |
|
1041 |
} |
|
1042 |
||
1043 |
mutex_enter(&dn->dn_mtx); |
|
1044 |
/* |
|
1045 |
* We could have been freed_in_flight between the dbuf_noread |
|
1046 |
* and dbuf_dirty. We win, as though the dbuf_noread() had |
|
1047 |
* happened after the free. |
|
1048 |
*/ |
|
1049 |
if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) { |
|
1050 |
dnode_clear_range(dn, db->db_blkid, 1, tx); |
|
1051 |
db->db_d.db_freed_in_flight = FALSE; |
|
1052 |
} |
|
1053 |
||
1054 |
db->db_dirtied = tx->tx_txg; |
|
1055 |
list_insert_tail(&dn->dn_dirty_dbufs[txgoff], db); |
|
1056 |
mutex_exit(&dn->dn_mtx); |
|
1057 |
||
1058 |
if (db->db_blkid != DB_BONUS_BLKID) { |
|
1544 | 1059 |
/* |
1060 |
* Update the accounting. |
|
1061 |
*/ |
|
1062 |
if (!dbuf_new_block(db) && db->db_blkptr) { |
|
789 | 1063 |
/* |
1064 |
* This is only a guess -- if the dbuf is dirty |
|
1065 |
* in a previous txg, we don't know how much |
|
1066 |
* space it will use on disk yet. We should |
|
1067 |
* really have the struct_rwlock to access |
|
1068 |
* db_blkptr, but since this is just a guess, |
|
1069 |
* it's OK if we get an odd answer. |
|
1070 |
*/ |
|
1071 |
dnode_willuse_space(dn, |
|
2082 | 1072 |
-bp_get_dasize(os->os_spa, db->db_blkptr), tx); |
789 | 1073 |
} |
1074 |
dnode_willuse_space(dn, db->db.db_size, tx); |
|
1075 |
} |
|
1076 |
||
1077 |
/* |
|
1078 |
* This buffer is now part of this txg |
|
1079 |
*/ |
|
1080 |
dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg); |
|
1081 |
db->db_dirtycnt += 1; |
|
1082 |
ASSERT3U(db->db_dirtycnt, <=, 3); |
|
1083 |
||
1084 |
mutex_exit(&db->db_mtx); |
|
1085 |
||
1086 |
if (db->db_blkid == DB_BONUS_BLKID) { |
|
1087 |
dnode_setdirty(dn, tx); |
|
1088 |
return; |
|
1089 |
} |
|
1090 |
||
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
1091 |
if (db->db_level == 0) { |
789 | 1092 |
dnode_new_blkid(dn, db->db_blkid, tx); |
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
1093 |
ASSERT(dn->dn_maxblkid >= db->db_blkid); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
1094 |
} |
789 | 1095 |
|
1096 |
if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { |
|
1097 |
rw_enter(&dn->dn_struct_rwlock, RW_READER); |
|
1098 |
drop_struct_lock = TRUE; |
|
1099 |
} |
|
1100 |
||
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
1101 |
if (db->db_level+1 < dn->dn_nlevels) { |
789 | 1102 |
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; |
1103 |
dmu_buf_impl_t *parent; |
|
1104 |
parent = dbuf_hold_level(dn, db->db_level+1, |
|
1105 |
db->db_blkid >> epbs, FTAG); |
|
1106 |
if (drop_struct_lock) |
|
1107 |
rw_exit(&dn->dn_struct_rwlock); |
|
1108 |
dbuf_dirty(parent, tx); |
|
1544 | 1109 |
dbuf_rele(parent, FTAG); |
789 | 1110 |
} else { |
1111 |
if (drop_struct_lock) |
|
1112 |
rw_exit(&dn->dn_struct_rwlock); |
|
1113 |
} |
|
1114 |
||
1115 |
dnode_setdirty(dn, tx); |
|
1116 |
} |
|
1117 |
||
1118 |
static int |
|
1119 |
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) |
|
1120 |
{ |
|
1121 |
dnode_t *dn = db->db_dnode; |
|
1122 |
int txgoff = tx->tx_txg & TXG_MASK; |
|
1544 | 1123 |
int64_t holds; |
789 | 1124 |
|
1125 |
ASSERT(tx->tx_txg != 0); |
|
1544 | 1126 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
789 | 1127 |
|
1128 |
mutex_enter(&db->db_mtx); |
|
1129 |
||
1130 |
/* |
|
1131 |
* If this buffer is not dirty, we're done. |
|
1132 |
*/ |
|
1133 |
if (!list_link_active(&db->db_dirty_node[txgoff])) { |
|
1134 |
mutex_exit(&db->db_mtx); |
|
1135 |
return (0); |
|
1136 |
} |
|
1137 |
||
1138 |
/* |
|
1139 |
* If this buffer is currently held, we cannot undirty |
|
1140 |
* it, since one of the current holders may be in the |
|
1141 |
* middle of an update. Note that users of dbuf_undirty() |
|
1142 |
* should not place a hold on the dbuf before the call. |
|
1143 |
*/ |
|
1144 |
if (refcount_count(&db->db_holds) > db->db_dirtycnt) { |
|
1145 |
mutex_exit(&db->db_mtx); |
|
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2417
diff
changeset
|
1146 |
/* Make sure we don't toss this buffer at sync phase */ |
789 | 1147 |
mutex_enter(&dn->dn_mtx); |
1148 |
dnode_clear_range(dn, db->db_blkid, 1, tx); |
|
1149 |
mutex_exit(&dn->dn_mtx); |
|
1150 |
return (0); |
|
1151 |
} |
|
1152 |
||
1153 |
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); |
|
1154 |
||
1155 |
dbuf_unoverride(db, tx->tx_txg); |
|
1156 |
||
1157 |
ASSERT(db->db.db_size != 0); |
|
1158 |
if (db->db_level == 0) { |
|
1159 |
ASSERT(db->db_buf != NULL); |
|
1160 |
ASSERT(db->db_d.db_data_old[txgoff] != NULL); |
|
1161 |
if (db->db_d.db_data_old[txgoff] != db->db_buf) |
|
1544 | 1162 |
VERIFY(arc_buf_remove_ref( |
1163 |
db->db_d.db_data_old[txgoff], db) == 1); |
|
789 | 1164 |
db->db_d.db_data_old[txgoff] = NULL; |
1165 |
} |
|
1166 |
||
1167 |
/* XXX would be nice to fix up dn_towrite_space[] */ |
|
1168 |
/* XXX undo db_dirtied? but how? */ |
|
1169 |
/* db->db_dirtied = tx->tx_txg; */ |
|
1170 |
||
1171 |
mutex_enter(&dn->dn_mtx); |
|
1172 |
list_remove(&dn->dn_dirty_dbufs[txgoff], db); |
|
1173 |
mutex_exit(&dn->dn_mtx); |
|
1174 |
||
1175 |
ASSERT(db->db_dirtycnt > 0); |
|
1176 |
db->db_dirtycnt -= 1; |
|
1177 |
||
1544 | 1178 |
if ((holds = refcount_remove(&db->db_holds, |
1179 |
(void *)(uintptr_t)tx->tx_txg)) == 0) { |
|
1180 |
arc_buf_t *buf = db->db_buf; |
|
789 | 1181 |
|
1544 | 1182 |
ASSERT(arc_released(buf)); |
1183 |
dbuf_set_data(db, NULL); |
|
1184 |
VERIFY(arc_buf_remove_ref(buf, db) == 1); |
|
789 | 1185 |
dbuf_evict(db); |
1186 |
return (1); |
|
1187 |
} |
|
1544 | 1188 |
ASSERT(holds > 0); |
789 | 1189 |
|
1190 |
mutex_exit(&db->db_mtx); |
|
1191 |
return (0); |
|
1192 |
} |
|
1193 |
||
1194 |
#pragma weak dmu_buf_will_dirty = dbuf_will_dirty |
|
1195 |
void |
|
1196 |
dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) |
|
1197 |
{ |
|
1198 |
int rf = DB_RF_MUST_SUCCEED; |
|
1199 |
||
1200 |
ASSERT(tx->tx_txg != 0); |
|
1201 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
1202 |
||
1203 |
if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock)) |
|
1204 |
rf |= DB_RF_HAVESTRUCT; |
|
1544 | 1205 |
(void) dbuf_read(db, NULL, rf); |
789 | 1206 |
dbuf_dirty(db, tx); |
1207 |
} |
|
1208 |
||
1209 |
void |
|
1544 | 1210 |
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) |
789 | 1211 |
{ |
1544 | 1212 |
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; |
1213 |
||
1214 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
|
789 | 1215 |
ASSERT(tx->tx_txg != 0); |
1216 |
ASSERT(db->db_level == 0); |
|
1217 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
1218 |
||
1544 | 1219 |
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT || |
789 | 1220 |
dmu_tx_private_ok(tx)); |
1221 |
||
1222 |
dbuf_noread(db); |
|
1223 |
dbuf_dirty(db, tx); |
|
1224 |
} |
|
1225 |
||
1226 |
#pragma weak dmu_buf_fill_done = dbuf_fill_done |
|
1227 |
/* ARGSUSED */ |
|
1228 |
void |
|
1229 |
dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx) |
|
1230 |
{ |
|
1231 |
mutex_enter(&db->db_mtx); |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
1232 |
DBUF_VERIFY(db); |
789 | 1233 |
|
1234 |
if (db->db_state == DB_FILL) { |
|
1235 |
if (db->db_level == 0 && db->db_d.db_freed_in_flight) { |
|
1544 | 1236 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
789 | 1237 |
/* we were freed while filling */ |
1238 |
/* XXX dbuf_undirty? */ |
|
1239 |
bzero(db->db.db_data, db->db.db_size); |
|
1240 |
db->db_d.db_freed_in_flight = FALSE; |
|
1241 |
} |
|
1242 |
db->db_state = DB_CACHED; |
|
1243 |
cv_broadcast(&db->db_changed); |
|
1244 |
} |
|
1245 |
mutex_exit(&db->db_mtx); |
|
1246 |
} |
|
1247 |
||
1544 | 1248 |
/* |
1249 |
* "Clear" the contents of this dbuf. This will mark the dbuf |
|
1250 |
* EVICTING and clear *most* of its references. Unfortunetely, |
|
1251 |
* when we are not holding the dn_dbufs_mtx, we can't clear the |
|
1252 |
* entry in the dn_dbufs list. We have to wait until dbuf_destroy() |
|
1253 |
* in this case. For callers from the DMU we will usually see: |
|
1254 |
* dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy() |
|
1255 |
* For the arc callback, we will usually see: |
|
1256 |
* dbuf_do_evict()->dbuf_clear();dbuf_destroy() |
|
1257 |
* Sometimes, though, we will get a mix of these two: |
|
1258 |
* DMU: dbuf_clear()->arc_buf_evict() |
|
1259 |
* ARC: dbuf_do_evict()->dbuf_destroy() |
|
1260 |
*/ |
|
1261 |
void |
|
789 | 1262 |
dbuf_clear(dmu_buf_impl_t *db) |
1263 |
{ |
|
1264 |
dnode_t *dn = db->db_dnode; |
|
1544 | 1265 |
dmu_buf_impl_t *parent = db->db_parent; |
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1266 |
dmu_buf_impl_t *dndb = dn->dn_dbuf; |
1544 | 1267 |
int dbuf_gone = FALSE; |
789 | 1268 |
|
1269 |
ASSERT(MUTEX_HELD(&db->db_mtx)); |
|
1270 |
ASSERT(refcount_is_zero(&db->db_holds)); |
|
1271 |
||
1544 | 1272 |
dbuf_evict_user(db); |
1273 |
||
789 | 1274 |
if (db->db_state == DB_CACHED) { |
1544 | 1275 |
ASSERT(db->db.db_data != NULL); |
1276 |
if (db->db_blkid == DB_BONUS_BLKID) |
|
1277 |
zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); |
|
789 | 1278 |
db->db.db_data = NULL; |
1279 |
db->db_state = DB_UNCACHED; |
|
1280 |
} |
|
1281 |
||
1282 |
ASSERT3U(db->db_state, ==, DB_UNCACHED); |
|
1283 |
ASSERT(db->db_data_pending == NULL); |
|
1284 |
||
1544 | 1285 |
db->db_state = DB_EVICTING; |
1286 |
db->db_blkptr = NULL; |
|
1287 |
||
1288 |
if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { |
|
1289 |
list_remove(&dn->dn_dbufs, db); |
|
1290 |
dnode_rele(dn, db); |
|
1291 |
} |
|
1292 |
||
1293 |
if (db->db_buf) |
|
1294 |
dbuf_gone = arc_buf_evict(db->db_buf); |
|
1295 |
||
1296 |
if (!dbuf_gone) |
|
1297 |
mutex_exit(&db->db_mtx); |
|
789 | 1298 |
|
1299 |
/* |
|
1300 |
* If this dbuf is referened from an indirect dbuf, |
|
1301 |
* decrement the ref count on the indirect dbuf. |
|
1302 |
*/ |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1303 |
if (parent && parent != dndb) |
1544 | 1304 |
dbuf_rele(parent, db); |
789 | 1305 |
} |
1306 |
||
1307 |
static int |
|
1308 |
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, |
|
1309 |
dmu_buf_impl_t **parentp, blkptr_t **bpp) |
|
1310 |
{ |
|
1311 |
int nlevels, epbs; |
|
1312 |
||
2417 | 1313 |
*parentp = NULL; |
1314 |
*bpp = NULL; |
|
1315 |
||
1544 | 1316 |
ASSERT(blkid != DB_BONUS_BLKID); |
1317 |
||
789 | 1318 |
if (dn->dn_phys->dn_nlevels == 0) |
1319 |
nlevels = 1; |
|
1320 |
else |
|
1321 |
nlevels = dn->dn_phys->dn_nlevels; |
|
1322 |
||
1323 |
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; |
|
1324 |
||
1325 |
ASSERT3U(level * epbs, <, 64); |
|
1326 |
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); |
|
1544 | 1327 |
if (level >= nlevels || |
789 | 1328 |
(blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { |
1329 |
/* the buffer has no parent yet */ |
|
1330 |
return (ENOENT); |
|
1331 |
} else if (level < nlevels-1) { |
|
1332 |
/* this block is referenced from an indirect block */ |
|
1333 |
int err = dbuf_hold_impl(dn, level+1, |
|
1334 |
blkid >> epbs, fail_sparse, NULL, parentp); |
|
1335 |
if (err) |
|
1336 |
return (err); |
|
1544 | 1337 |
err = dbuf_read(*parentp, NULL, |
1338 |
(DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL)); |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1339 |
if (err) { |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1340 |
dbuf_rele(*parentp, NULL); |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1341 |
*parentp = NULL; |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1342 |
return (err); |
1544 | 1343 |
} |
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1344 |
*bpp = ((blkptr_t *)(*parentp)->db.db_data) + |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1345 |
(blkid & ((1ULL << epbs) - 1)); |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1346 |
return (0); |
789 | 1347 |
} else { |
1348 |
/* the block is referenced from the dnode */ |
|
1349 |
ASSERT3U(level, ==, nlevels-1); |
|
1350 |
ASSERT(dn->dn_phys->dn_nblkptr == 0 || |
|
1351 |
blkid < dn->dn_phys->dn_nblkptr); |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1352 |
if (dn->dn_dbuf) { |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1353 |
dbuf_add_ref(dn->dn_dbuf, NULL); |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1354 |
*parentp = dn->dn_dbuf; |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1355 |
} |
789 | 1356 |
*bpp = &dn->dn_phys->dn_blkptr[blkid]; |
1357 |
return (0); |
|
1358 |
} |
|
1359 |
} |
|
1360 |
||
1361 |
static dmu_buf_impl_t * |
|
1362 |
dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, |
|
1363 |
dmu_buf_impl_t *parent, blkptr_t *blkptr) |
|
1364 |
{ |
|
1365 |
objset_impl_t *os = dn->dn_objset; |
|
1366 |
dmu_buf_impl_t *db, *odb; |
|
1367 |
||
1368 |
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); |
|
1369 |
ASSERT(dn->dn_type != DMU_OT_NONE); |
|
1370 |
||
1371 |
db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); |
|
1372 |
||
1373 |
db->db_objset = os; |
|
1374 |
db->db.db_object = dn->dn_object; |
|
1375 |
db->db_level = level; |
|
1376 |
db->db_blkid = blkid; |
|
1544 | 1377 |
db->db_dirtied = 0; |
1378 |
db->db_dirtycnt = 0; |
|
1379 |
db->db_dnode = dn; |
|
1380 |
db->db_parent = parent; |
|
1381 |
db->db_blkptr = blkptr; |
|
789 | 1382 |
|
1544 | 1383 |
bzero(&db->db_d, sizeof (db->db_d)); |
1384 |
||
1385 |
if (blkid == DB_BONUS_BLKID) { |
|
1386 |
ASSERT3P(parent, ==, dn->dn_dbuf); |
|
789 | 1387 |
db->db.db_size = dn->dn_bonuslen; |
1388 |
db->db.db_offset = DB_BONUS_BLKID; |
|
1544 | 1389 |
db->db_state = DB_UNCACHED; |
1390 |
/* the bonus dbuf is not placed in the hash table */ |
|
1391 |
return (db); |
|
789 | 1392 |
} else { |
1393 |
int blocksize = |
|
1394 |
db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz; |
|
1395 |
db->db.db_size = blocksize; |
|
1396 |
db->db.db_offset = db->db_blkid * blocksize; |
|
1397 |
} |
|
1398 |
||
1399 |
/* |
|
1400 |
* Hold the dn_dbufs_mtx while we get the new dbuf |
|
1401 |
* in the hash table *and* added to the dbufs list. |
|
1402 |
* This prevents a possible deadlock with someone |
|
1403 |
* trying to look up this dbuf before its added to the |
|
1404 |
* dn_dbufs list. |
|
1405 |
*/ |
|
1406 |
mutex_enter(&dn->dn_dbufs_mtx); |
|
1544 | 1407 |
db->db_state = DB_EVICTING; |
789 | 1408 |
if ((odb = dbuf_hash_insert(db)) != NULL) { |
1409 |
/* someone else inserted it first */ |
|
1410 |
kmem_cache_free(dbuf_cache, db); |
|
1411 |
mutex_exit(&dn->dn_dbufs_mtx); |
|
1412 |
return (odb); |
|
1413 |
} |
|
1414 |
list_insert_head(&dn->dn_dbufs, db); |
|
1544 | 1415 |
db->db_state = DB_UNCACHED; |
789 | 1416 |
mutex_exit(&dn->dn_dbufs_mtx); |
1417 |
||
1418 |
if (parent && parent != dn->dn_dbuf) |
|
1419 |
dbuf_add_ref(parent, db); |
|
1420 |
||
1544 | 1421 |
ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || |
1422 |
refcount_count(&dn->dn_holds) > 0); |
|
789 | 1423 |
(void) refcount_add(&dn->dn_holds, db); |
1424 |
||
1425 |
dprintf_dbuf(db, "db=%p\n", db); |
|
1426 |
||
1427 |
return (db); |
|
1428 |
} |
|
1429 |
||
1430 |
static int |
|
1544 | 1431 |
dbuf_do_evict(void *private) |
789 | 1432 |
{ |
1544 | 1433 |
arc_buf_t *buf = private; |
1434 |
dmu_buf_impl_t *db = buf->b_private; |
|
789 | 1435 |
|
1544 | 1436 |
if (!MUTEX_HELD(&db->db_mtx)) |
1437 |
mutex_enter(&db->db_mtx); |
|
789 | 1438 |
|
1544 | 1439 |
ASSERT(refcount_is_zero(&db->db_holds)); |
789 | 1440 |
|
1544 | 1441 |
if (db->db_state != DB_EVICTING) { |
1442 |
ASSERT(db->db_state == DB_CACHED); |
|
1443 |
DBUF_VERIFY(db); |
|
1444 |
db->db_buf = NULL; |
|
1445 |
dbuf_evict(db); |
|
1446 |
} else { |
|
1447 |
mutex_exit(&db->db_mtx); |
|
1448 |
dbuf_destroy(db); |
|
789 | 1449 |
} |
1544 | 1450 |
return (0); |
789 | 1451 |
} |
1452 |
||
1453 |
static void |
|
1454 |
dbuf_destroy(dmu_buf_impl_t *db) |
|
1455 |
{ |
|
1456 |
ASSERT(refcount_is_zero(&db->db_holds)); |
|
1457 |
||
1544 | 1458 |
if (db->db_blkid != DB_BONUS_BLKID) { |
1459 |
dnode_t *dn = db->db_dnode; |
|
1460 |
||
1461 |
/* |
|
1462 |
* If this dbuf is still on the dn_dbufs list, |
|
1463 |
* remove it from that list. |
|
1464 |
*/ |
|
1465 |
if (list_link_active(&db->db_link)) { |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1466 |
mutex_enter(&dn->dn_dbufs_mtx); |
1544 | 1467 |
list_remove(&dn->dn_dbufs, db); |
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1468 |
mutex_exit(&dn->dn_dbufs_mtx); |
1544 | 1469 |
|
1470 |
dnode_rele(dn, db); |
|
1471 |
} |
|
1472 |
dbuf_hash_remove(db); |
|
1473 |
} |
|
1474 |
db->db_parent = NULL; |
|
1475 |
db->db_dnode = NULL; |
|
1476 |
db->db_buf = NULL; |
|
1477 |
||
789 | 1478 |
ASSERT(db->db.db_data == NULL); |
1479 |
ASSERT(db->db_hash_next == NULL); |
|
1480 |
ASSERT(db->db_blkptr == NULL); |
|
1481 |
ASSERT(db->db_data_pending == NULL); |
|
1482 |
||
1483 |
kmem_cache_free(dbuf_cache, db); |
|
1484 |
} |
|
1485 |
||
1486 |
void |
|
1487 |
dbuf_prefetch(dnode_t *dn, uint64_t blkid) |
|
1488 |
{ |
|
2391 | 1489 |
dmu_buf_impl_t *db = NULL; |
789 | 1490 |
blkptr_t *bp = NULL; |
1491 |
||
1492 |
ASSERT(blkid != DB_BONUS_BLKID); |
|
1493 |
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); |
|
1494 |
||
1495 |
if (dnode_block_freed(dn, blkid)) |
|
1496 |
return; |
|
1497 |
||
1498 |
/* dbuf_find() returns with db_mtx held */ |
|
1499 |
if (db = dbuf_find(dn, 0, blkid)) { |
|
2391 | 1500 |
if (refcount_count(&db->db_holds) > 0) { |
1501 |
/* |
|
1502 |
* This dbuf is active. We assume that it is |
|
1503 |
* already CACHED, or else about to be either |
|
1504 |
* read or filled. |
|
1505 |
*/ |
|
1506 |
mutex_exit(&db->db_mtx); |
|
1507 |
return; |
|
1508 |
} |
|
789 | 1509 |
mutex_exit(&db->db_mtx); |
2417 | 1510 |
db = NULL; |
789 | 1511 |
} |
1512 |
||
2391 | 1513 |
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { |
789 | 1514 |
if (bp && !BP_IS_HOLE(bp)) { |
2391 | 1515 |
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; |
1544 | 1516 |
zbookmark_t zb; |
1517 |
zb.zb_objset = dn->dn_objset->os_dsl_dataset ? |
|
1518 |
dn->dn_objset->os_dsl_dataset->ds_object : 0; |
|
1519 |
zb.zb_object = dn->dn_object; |
|
1520 |
zb.zb_level = 0; |
|
1521 |
zb.zb_blkid = blkid; |
|
1522 |
||
789 | 1523 |
(void) arc_read(NULL, dn->dn_objset->os_spa, bp, |
1524 |
dmu_ot[dn->dn_type].ot_byteswap, |
|
1525 |
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, |
|
1526 |
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, |
|
2391 | 1527 |
&aflags, &zb); |
789 | 1528 |
} |
2391 | 1529 |
if (db) |
1530 |
dbuf_rele(db, NULL); |
|
789 | 1531 |
} |
1532 |
} |
|
1533 |
||
1534 |
/* |
|
1535 |
* Returns with db_holds incremented, and db_mtx not held. |
|
1536 |
* Note: dn_struct_rwlock must be held. |
|
1537 |
*/ |
|
1538 |
int |
|
1539 |
dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, |
|
1540 |
void *tag, dmu_buf_impl_t **dbp) |
|
1541 |
{ |
|
1542 |
dmu_buf_impl_t *db, *parent = NULL; |
|
1543 |
||
1544 | 1544 |
ASSERT(blkid != DB_BONUS_BLKID); |
789 | 1545 |
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); |
1546 |
ASSERT3U(dn->dn_nlevels, >, level); |
|
1547 |
||
1548 |
*dbp = NULL; |
|
1544 | 1549 |
top: |
789 | 1550 |
/* dbuf_find() returns with db_mtx held */ |
1551 |
db = dbuf_find(dn, level, blkid); |
|
1552 |
||
1553 |
if (db == NULL) { |
|
1554 |
blkptr_t *bp = NULL; |
|
1555 |
int err; |
|
1556 |
||
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1557 |
ASSERT3P(parent, ==, NULL); |
789 | 1558 |
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); |
1559 |
if (fail_sparse) { |
|
1560 |
if (err == 0 && bp && BP_IS_HOLE(bp)) |
|
1561 |
err = ENOENT; |
|
1562 |
if (err) { |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1563 |
if (parent) |
1544 | 1564 |
dbuf_rele(parent, NULL); |
789 | 1565 |
return (err); |
1566 |
} |
|
1567 |
} |
|
1544 | 1568 |
if (err && err != ENOENT) |
1569 |
return (err); |
|
789 | 1570 |
db = dbuf_create(dn, level, blkid, parent, bp); |
1571 |
} |
|
1572 |
||
1544 | 1573 |
if (db->db_buf && refcount_is_zero(&db->db_holds)) { |
1574 |
arc_buf_add_ref(db->db_buf, db); |
|
1575 |
if (db->db_buf->b_data == NULL) { |
|
1576 |
dbuf_clear(db); |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1577 |
if (parent) { |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1578 |
dbuf_rele(parent, NULL); |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1579 |
parent = NULL; |
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1580 |
} |
1544 | 1581 |
goto top; |
1582 |
} |
|
1583 |
ASSERT3P(db->db.db_data, ==, db->db_buf->b_data); |
|
1584 |
} |
|
1585 |
||
1586 |
ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf)); |
|
1587 |
||
789 | 1588 |
/* |
1589 |
* If this buffer is currently syncing out, and we are |
|
1590 |
* are still referencing it from db_data, we need to make |
|
1591 |
* a copy of it in case we decide we want to dirty it |
|
1592 |
* again in this txg. |
|
1593 |
*/ |
|
1594 |
if (db->db_level == 0 && db->db_state == DB_CACHED && |
|
1544 | 1595 |
dn->dn_object != DMU_META_DNODE_OBJECT && |
789 | 1596 |
db->db_data_pending == db->db_buf) { |
1597 |
int size = (db->db_blkid == DB_BONUS_BLKID) ? |
|
1598 |
DN_MAX_BONUSLEN : db->db.db_size; |
|
3290 | 1599 |
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); |
789 | 1600 |
|
1601 |
dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa, |
|
3290 | 1602 |
size, db, type)); |
789 | 1603 |
bcopy(db->db_data_pending->b_data, db->db.db_data, |
1604 |
db->db.db_size); |
|
1605 |
} |
|
1606 |
||
1544 | 1607 |
(void) refcount_add(&db->db_holds, tag); |
789 | 1608 |
dbuf_update_data(db); |
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
1609 |
DBUF_VERIFY(db); |
789 | 1610 |
mutex_exit(&db->db_mtx); |
1611 |
||
1612 |
/* NOTE: we can't rele the parent until after we drop the db_mtx */ |
|
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
1613 |
if (parent) |
1544 | 1614 |
dbuf_rele(parent, NULL); |
789 | 1615 |
|
1616 |
ASSERT3P(db->db_dnode, ==, dn); |
|
1617 |
ASSERT3U(db->db_blkid, ==, blkid); |
|
1618 |
ASSERT3U(db->db_level, ==, level); |
|
1619 |
*dbp = db; |
|
1620 |
||
1621 |
return (0); |
|
1622 |
} |
|
1623 |
||
1624 |
dmu_buf_impl_t * |
|
1544 | 1625 |
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag) |
789 | 1626 |
{ |
1627 |
dmu_buf_impl_t *db; |
|
1544 | 1628 |
int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db); |
1629 |
return (err ? NULL : db); |
|
789 | 1630 |
} |
1631 |
||
1632 |
dmu_buf_impl_t * |
|
1633 |
dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag) |
|
1634 |
{ |
|
1635 |
dmu_buf_impl_t *db; |
|
1544 | 1636 |
int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db); |
1637 |
return (err ? NULL : db); |
|
789 | 1638 |
} |
1639 |
||
1640 |
dmu_buf_impl_t * |
|
1544 | 1641 |
dbuf_create_bonus(dnode_t *dn) |
789 | 1642 |
{ |
1544 | 1643 |
dmu_buf_impl_t *db = dn->dn_bonus; |
1644 |
||
1645 |
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); |
|
1646 |
||
1647 |
ASSERT(dn->dn_bonus == NULL); |
|
1648 |
db = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL); |
|
789 | 1649 |
return (db); |
1650 |
} |
|
1651 |
||
1544 | 1652 |
#pragma weak dmu_buf_add_ref = dbuf_add_ref |
789 | 1653 |
void |
1654 |
dbuf_add_ref(dmu_buf_impl_t *db, void *tag) |
|
1655 |
{ |
|
1544 | 1656 |
int64_t holds = refcount_add(&db->db_holds, tag); |
1657 |
ASSERT(holds > 1); |
|
789 | 1658 |
} |
1659 |
||
1544 | 1660 |
#pragma weak dmu_buf_rele = dbuf_rele |
789 | 1661 |
void |
1544 | 1662 |
dbuf_rele(dmu_buf_impl_t *db, void *tag) |
789 | 1663 |
{ |
1664 |
int64_t holds; |
|
1665 |
||
1666 |
mutex_enter(&db->db_mtx); |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
1667 |
DBUF_VERIFY(db); |
789 | 1668 |
|
1669 |
holds = refcount_remove(&db->db_holds, tag); |
|
1544 | 1670 |
ASSERT(holds >= 0); |
1671 |
||
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1672 |
if (db->db_buf && holds == db->db_dirtycnt) |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1673 |
arc_buf_freeze(db->db_buf); |
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1674 |
|
1544 | 1675 |
if (holds == db->db_dirtycnt && |
1676 |
db->db_level == 0 && db->db_d.db_immediate_evict) |
|
1677 |
dbuf_evict_user(db); |
|
789 | 1678 |
|
1679 |
if (holds == 0) { |
|
1544 | 1680 |
if (db->db_blkid == DB_BONUS_BLKID) { |
1681 |
mutex_exit(&db->db_mtx); |
|
1682 |
dnode_rele(db->db_dnode, db); |
|
1683 |
} else if (db->db_buf == NULL) { |
|
1684 |
/* |
|
1685 |
* This is a special case: we never associated this |
|
1686 |
* dbuf with any data allocated from the ARC. |
|
1687 |
*/ |
|
1688 |
ASSERT3U(db->db_state, ==, DB_UNCACHED); |
|
1689 |
dbuf_evict(db); |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1690 |
} else if (arc_released(db->db_buf)) { |
1544 | 1691 |
arc_buf_t *buf = db->db_buf; |
1692 |
/* |
|
1693 |
* This dbuf has anonymous data associated with it. |
|
1694 |
*/ |
|
1695 |
dbuf_set_data(db, NULL); |
|
1696 |
VERIFY(arc_buf_remove_ref(buf, db) == 1); |
|
1697 |
dbuf_evict(db); |
|
1698 |
} else { |
|
1699 |
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); |
|
1700 |
mutex_exit(&db->db_mtx); |
|
1701 |
} |
|
789 | 1702 |
} else { |
1703 |
mutex_exit(&db->db_mtx); |
|
1704 |
} |
|
1705 |
} |
|
1706 |
||
1707 |
#pragma weak dmu_buf_refcount = dbuf_refcount |
|
1708 |
uint64_t |
|
1709 |
dbuf_refcount(dmu_buf_impl_t *db) |
|
1710 |
{ |
|
1711 |
return (refcount_count(&db->db_holds)); |
|
1712 |
} |
|
1713 |
||
1714 |
void * |
|
1715 |
dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, |
|
1716 |
dmu_buf_evict_func_t *evict_func) |
|
1717 |
{ |
|
1718 |
return (dmu_buf_update_user(db_fake, NULL, user_ptr, |
|
1719 |
user_data_ptr_ptr, evict_func)); |
|
1720 |
} |
|
1721 |
||
1722 |
void * |
|
1723 |
dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, |
|
1724 |
dmu_buf_evict_func_t *evict_func) |
|
1725 |
{ |
|
1726 |
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; |
|
1727 |
||
1728 |
db->db_d.db_immediate_evict = TRUE; |
|
1729 |
return (dmu_buf_update_user(db_fake, NULL, user_ptr, |
|
1730 |
user_data_ptr_ptr, evict_func)); |
|
1731 |
} |
|
1732 |
||
1733 |
void * |
|
1734 |
dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, |
|
1735 |
void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func) |
|
1736 |
{ |
|
1737 |
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; |
|
1738 |
ASSERT(db->db_level == 0); |
|
1739 |
||
1740 |
ASSERT((user_ptr == NULL) == (evict_func == NULL)); |
|
1741 |
||
1742 |
mutex_enter(&db->db_mtx); |
|
1743 |
||
1744 |
if (db->db_d.db_user_ptr == old_user_ptr) { |
|
1745 |
db->db_d.db_user_ptr = user_ptr; |
|
1746 |
db->db_d.db_user_data_ptr_ptr = user_data_ptr_ptr; |
|
1747 |
db->db_d.db_evict_func = evict_func; |
|
1748 |
||
1749 |
dbuf_update_data(db); |
|
1750 |
} else { |
|
1751 |
old_user_ptr = db->db_d.db_user_ptr; |
|
1752 |
} |
|
1753 |
||
1754 |
mutex_exit(&db->db_mtx); |
|
1755 |
return (old_user_ptr); |
|
1756 |
} |
|
1757 |
||
1758 |
void * |
|
1759 |
dmu_buf_get_user(dmu_buf_t *db_fake) |
|
1760 |
{ |
|
1761 |
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; |
|
1762 |
ASSERT(!refcount_is_zero(&db->db_holds)); |
|
1763 |
||
1764 |
return (db->db_d.db_user_ptr); |
|
1765 |
} |
|
1766 |
||
1767 |
void |
|
1768 |
dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx) |
|
1769 |
{ |
|
1770 |
arc_buf_t **data; |
|
1771 |
uint64_t txg = tx->tx_txg; |
|
1772 |
dnode_t *dn = db->db_dnode; |
|
1773 |
objset_impl_t *os = dn->dn_objset; |
|
1163
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1774 |
int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; |
1544 | 1775 |
int checksum, compress; |
1776 |
zbookmark_t zb; |
|
789 | 1777 |
int blksz; |
3290 | 1778 |
arc_buf_contents_t type; |
789 | 1779 |
|
1780 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
1781 |
||
1782 |
dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); |
|
1783 |
||
1784 |
mutex_enter(&db->db_mtx); |
|
1785 |
/* |
|
1786 |
* To be synced, we must be dirtied. But we |
|
1787 |
* might have been freed after the dirty. |
|
1788 |
*/ |
|
1789 |
if (db->db_state == DB_UNCACHED) { |
|
1790 |
/* This buffer has been freed since it was dirtied */ |
|
1791 |
ASSERT(db->db.db_data == NULL); |
|
1792 |
} else if (db->db_state == DB_FILL) { |
|
1793 |
/* This buffer was freed and is now being re-filled */ |
|
1794 |
ASSERT(db->db.db_data != db->db_d.db_data_old[txg&TXG_MASK]); |
|
1795 |
} else { |
|
1796 |
ASSERT3U(db->db_state, ==, DB_CACHED); |
|
1797 |
} |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
1798 |
DBUF_VERIFY(db); |
789 | 1799 |
|
1800 |
/* |
|
1801 |
* Don't need a lock on db_dirty (dn_mtx), because it can't |
|
1802 |
* be modified yet. |
|
1803 |
*/ |
|
1804 |
||
1544 | 1805 |
if (db->db_blkid == DB_BONUS_BLKID) { |
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1806 |
arc_buf_t **datap = &db->db_d.db_data_old[txg&TXG_MASK]; |
1544 | 1807 |
/* |
1808 |
* Simply copy the bonus data into the dnode. It will |
|
1809 |
* be written out when the dnode is synced (and it will |
|
1810 |
* be synced, since it must have been dirty for dbuf_sync |
|
1811 |
* to be called). |
|
1812 |
*/ |
|
1813 |
/* |
|
1814 |
* Use dn_phys->dn_bonuslen since db.db_size is the length |
|
1815 |
* of the bonus buffer in the open transaction rather than |
|
1816 |
* the syncing transaction. |
|
1817 |
*/ |
|
1818 |
ASSERT(*datap != NULL); |
|
1819 |
ASSERT3U(db->db_level, ==, 0); |
|
1820 |
ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); |
|
1821 |
bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); |
|
1822 |
if (*datap != db->db.db_data) |
|
1823 |
zio_buf_free(*datap, DN_MAX_BONUSLEN); |
|
1824 |
db->db_d.db_data_old[txg&TXG_MASK] = NULL; |
|
1825 |
db->db_data_pending = NULL; |
|
1826 |
if (db->db_dirtied == txg) |
|
1827 |
db->db_dirtied = 0; |
|
1828 |
ASSERT(db->db_dirtycnt > 0); |
|
1829 |
db->db_dirtycnt -= 1; |
|
1830 |
mutex_exit(&db->db_mtx); |
|
1831 |
dbuf_rele(db, (void *)(uintptr_t)txg); |
|
1832 |
return; |
|
1833 |
} |
|
1834 |
||
789 | 1835 |
if (db->db_level == 0) { |
3290 | 1836 |
type = DBUF_GET_BUFC_TYPE(db); |
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1837 |
data = &db->db_d.db_data_old[txg&TXG_MASK]; |
789 | 1838 |
blksz = arc_buf_size(*data); |
2237 | 1839 |
|
1840 |
/* |
|
1841 |
* This buffer is in the middle of an immdiate write. |
|
1842 |
* Wait for the synchronous IO to complete. |
|
1843 |
*/ |
|
1844 |
while (db->db_d.db_overridden_by[txg&TXG_MASK] == IN_DMU_SYNC) { |
|
1845 |
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); |
|
1846 |
cv_wait(&db->db_changed, &db->db_mtx); |
|
1847 |
ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK]); |
|
1848 |
} |
|
789 | 1849 |
/* |
1850 |
* If this buffer is currently "in use" (i.e., there are |
|
1851 |
* active holds and db_data still references it), then make |
|
1852 |
* a copy before we start the write so that any modifications |
|
1853 |
* from the open txg will not leak into this write. |
|
1854 |
* |
|
1855 |
* NOTE: this copy does not need to be made for objects only |
|
1856 |
* modified in the syncing context (e.g. DNONE_DNODE blocks) |
|
1857 |
* or if there is no actual write involved (bonus blocks). |
|
1858 |
*/ |
|
1544 | 1859 |
if (dn->dn_object != DMU_META_DNODE_OBJECT && |
1860 |
db->db_d.db_overridden_by[txg&TXG_MASK] == NULL) { |
|
789 | 1861 |
if (refcount_count(&db->db_holds) > 1 && |
1862 |
*data == db->db_buf) { |
|
3290 | 1863 |
*data = arc_buf_alloc(os->os_spa, blksz, db, |
1864 |
type); |
|
789 | 1865 |
bcopy(db->db.db_data, (*data)->b_data, blksz); |
1866 |
} |
|
1867 |
db->db_data_pending = *data; |
|
1544 | 1868 |
} else if (dn->dn_object == DMU_META_DNODE_OBJECT) { |
789 | 1869 |
/* |
1870 |
* Private object buffers are released here rather |
|
1871 |
* than in dbuf_dirty() since they are only modified |
|
1872 |
* in the syncing context and we don't want the |
|
1873 |
* overhead of making multiple copies of the data. |
|
1874 |
*/ |
|
1875 |
arc_release(db->db_buf, db); |
|
1876 |
} |
|
1877 |
} else { |
|
1878 |
data = &db->db_buf; |
|
1879 |
if (*data == NULL) { |
|
1880 |
/* |
|
1881 |
* This can happen if we dirty and then free |
|
1882 |
* the level-0 data blocks in the same txg. So |
|
1883 |
* this indirect remains unchanged. |
|
1884 |
*/ |
|
1885 |
if (db->db_dirtied == txg) |
|
1886 |
db->db_dirtied = 0; |
|
1887 |
ASSERT(db->db_dirtycnt > 0); |
|
1888 |
db->db_dirtycnt -= 1; |
|
1889 |
mutex_exit(&db->db_mtx); |
|
1544 | 1890 |
dbuf_rele(db, (void *)(uintptr_t)txg); |
789 | 1891 |
return; |
1892 |
} |
|
1893 |
blksz = db->db.db_size; |
|
1894 |
ASSERT3U(blksz, ==, 1<<dn->dn_phys->dn_indblkshift); |
|
1895 |
} |
|
1896 |
||
1897 |
ASSERT(*data != NULL); |
|
1898 |
||
1544 | 1899 |
if (db->db_level > 0 && !arc_released(db->db_buf)) { |
789 | 1900 |
/* |
1901 |
* This indirect buffer was marked dirty, but |
|
1902 |
* never modified (if it had been modified, then |
|
1903 |
* we would have released the buffer). There is |
|
1904 |
* no reason to write anything. |
|
1905 |
*/ |
|
1906 |
db->db_data_pending = NULL; |
|
1907 |
if (db->db_dirtied == txg) |
|
1908 |
db->db_dirtied = 0; |
|
1909 |
ASSERT(db->db_dirtycnt > 0); |
|
1910 |
db->db_dirtycnt -= 1; |
|
1911 |
mutex_exit(&db->db_mtx); |
|
1544 | 1912 |
dbuf_rele(db, (void *)(uintptr_t)txg); |
789 | 1913 |
return; |
1914 |
} else if (db->db_blkptr == NULL && |
|
1915 |
db->db_level == dn->dn_phys->dn_nlevels-1 && |
|
1916 |
db->db_blkid < dn->dn_phys->dn_nblkptr) { |
|
1917 |
/* |
|
1918 |
* This buffer was allocated at a time when there was |
|
1919 |
* no available blkptrs from the dnode, or it was |
|
1920 |
* inappropriate to hook it in (i.e., nlevels mis-match). |
|
1921 |
*/ |
|
1922 |
ASSERT(db->db_blkptr == NULL); |
|
1923 |
ASSERT(db->db_parent == NULL); |
|
1924 |
db->db_parent = dn->dn_dbuf; |
|
1925 |
db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid]; |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
1926 |
DBUF_VERIFY(db); |
789 | 1927 |
mutex_exit(&db->db_mtx); |
1928 |
} else if (db->db_blkptr == NULL) { |
|
1929 |
dmu_buf_impl_t *parent = db->db_parent; |
|
1930 |
||
1931 |
mutex_exit(&db->db_mtx); |
|
1932 |
ASSERT(dn->dn_phys->dn_nlevels > 1); |
|
1933 |
if (parent == NULL) { |
|
1934 |
rw_enter(&dn->dn_struct_rwlock, RW_READER); |
|
1935 |
(void) dbuf_hold_impl(dn, db->db_level+1, |
|
1544 | 1936 |
db->db_blkid >> epbs, FALSE, FTAG, &parent); |
789 | 1937 |
rw_exit(&dn->dn_struct_rwlock); |
1938 |
dbuf_add_ref(parent, db); |
|
1939 |
db->db_parent = parent; |
|
1544 | 1940 |
dbuf_rele(parent, FTAG); |
789 | 1941 |
} |
1544 | 1942 |
(void) dbuf_read(parent, NULL, DB_RF_MUST_SUCCEED); |
789 | 1943 |
} else { |
1944 |
mutex_exit(&db->db_mtx); |
|
1945 |
} |
|
1946 |
||
1544 | 1947 |
ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || db->db_parent != NULL); |
789 | 1948 |
|
1163
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1949 |
if (db->db_level > 0 && |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1950 |
db->db_blkid > dn->dn_phys->dn_maxblkid >> (db->db_level * epbs)) { |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1951 |
/* |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1952 |
* Don't write indirect blocks past EOF. |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1953 |
* We get these when we truncate a file *after* dirtying |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1954 |
* blocks in the truncate range (we undirty the level 0 |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1955 |
* blocks in dbuf_free_range(), but not the indirects). |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1956 |
*/ |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1957 |
#ifdef ZFS_DEBUG |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1958 |
/* |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1959 |
* Verify that this indirect block is empty. |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1960 |
*/ |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1961 |
blkptr_t *bplist; |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1962 |
int i; |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1963 |
|
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1964 |
mutex_enter(&db->db_mtx); |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1965 |
bplist = db->db.db_data; |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1966 |
for (i = 0; i < (1 << epbs); i++) { |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1967 |
if (!BP_IS_HOLE(&bplist[i])) { |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1968 |
panic("data past EOF: " |
1199 | 1969 |
"db=%p level=%d id=%llu i=%d\n", |
1970 |
db, db->db_level, |
|
1971 |
(u_longlong_t)db->db_blkid, i); |
|
1163
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1972 |
} |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1973 |
} |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1974 |
mutex_exit(&db->db_mtx); |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1975 |
#endif |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1976 |
ASSERT(db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)); |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1977 |
mutex_enter(&db->db_mtx); |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1978 |
db->db_dirtycnt -= 1; |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1979 |
mutex_exit(&db->db_mtx); |
1544 | 1980 |
dbuf_rele(db, (void *)(uintptr_t)txg); |
1163
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1981 |
return; |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1982 |
} |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
1983 |
|
789 | 1984 |
if (db->db_parent != dn->dn_dbuf) { |
1985 |
dmu_buf_impl_t *parent = db->db_parent; |
|
1986 |
||
1987 |
mutex_enter(&db->db_mtx); |
|
1988 |
ASSERT(db->db_level == parent->db_level-1); |
|
1989 |
ASSERT(list_link_active(&parent->db_dirty_node[txg&TXG_MASK])); |
|
1990 |
/* |
|
1544 | 1991 |
* We may have read this indirect block after we dirtied it, |
789 | 1992 |
* so never released it from the cache. |
1993 |
*/ |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
1994 |
arc_release(parent->db_buf, parent); |
789 | 1995 |
|
1996 |
db->db_blkptr = (blkptr_t *)parent->db.db_data + |
|
1997 |
(db->db_blkid & ((1ULL << epbs) - 1)); |
|
873
adefbfa5f42d
6347448 non ZFS_DEBUG kernels shouldn't call empty verify functions
ek110237
parents:
789
diff
changeset
|
1998 |
DBUF_VERIFY(db); |
789 | 1999 |
mutex_exit(&db->db_mtx); |
2000 |
#ifdef ZFS_DEBUG |
|
1544 | 2001 |
} else { |
789 | 2002 |
/* |
2003 |
* We don't need to dnode_setdirty(dn) because if we got |
|
2004 |
* here then the parent is already dirty. |
|
2005 |
*/ |
|
2006 |
ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1); |
|
2007 |
ASSERT3P(db->db_blkptr, ==, |
|
2008 |
&dn->dn_phys->dn_blkptr[db->db_blkid]); |
|
1544 | 2009 |
#endif |
789 | 2010 |
} |
1544 | 2011 |
ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf)); |
2012 |
||
789 | 2013 |
if (db->db_level == 0 && |
2014 |
db->db_d.db_overridden_by[txg&TXG_MASK] != NULL) { |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
2015 |
arc_buf_t **old = &db->db_d.db_data_old[txg&TXG_MASK]; |
789 | 2016 |
blkptr_t **bpp = &db->db_d.db_overridden_by[txg&TXG_MASK]; |
2082 | 2017 |
int old_size = bp_get_dasize(os->os_spa, db->db_blkptr); |
2018 |
int new_size = bp_get_dasize(os->os_spa, *bpp); |
|
789 | 2019 |
|
2020 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
|
2021 |
||
2022 |
dnode_diduse_space(dn, new_size-old_size); |
|
2023 |
mutex_enter(&dn->dn_mtx); |
|
2024 |
if (db->db_blkid > dn->dn_phys->dn_maxblkid) |
|
2025 |
dn->dn_phys->dn_maxblkid = db->db_blkid; |
|
2026 |
mutex_exit(&dn->dn_mtx); |
|
2027 |
||
2028 |
dsl_dataset_block_born(os->os_dsl_dataset, *bpp, tx); |
|
2029 |
if (!BP_IS_HOLE(db->db_blkptr)) |
|
2030 |
dsl_dataset_block_kill(os->os_dsl_dataset, |
|
2031 |
db->db_blkptr, os->os_synctx); |
|
2032 |
||
2033 |
mutex_enter(&db->db_mtx); |
|
2034 |
*db->db_blkptr = **bpp; |
|
2035 |
kmem_free(*bpp, sizeof (blkptr_t)); |
|
2036 |
*bpp = NULL; |
|
2037 |
||
2038 |
if (*old != db->db_buf) |
|
1544 | 2039 |
VERIFY(arc_buf_remove_ref(*old, db) == 1); |
2040 |
else if (!BP_IS_HOLE(db->db_blkptr)) |
|
2041 |
arc_set_callback(db->db_buf, dbuf_do_evict, db); |
|
2042 |
else |
|
2043 |
ASSERT(arc_released(db->db_buf)); |
|
789 | 2044 |
*old = NULL; |
2045 |
db->db_data_pending = NULL; |
|
2046 |
||
2047 |
cv_broadcast(&db->db_changed); |
|
2048 |
||
2049 |
ASSERT(db->db_dirtycnt > 0); |
|
2050 |
db->db_dirtycnt -= 1; |
|
2051 |
mutex_exit(&db->db_mtx); |
|
1544 | 2052 |
dbuf_rele(db, (void *)(uintptr_t)txg); |
2053 |
return; |
|
2054 |
} |
|
789 | 2055 |
|
1544 | 2056 |
if (db->db_level > 0) { |
2057 |
/* |
|
2058 |
* XXX -- we should design a compression algorithm |
|
2059 |
* that specializes in arrays of bps. |
|
2060 |
*/ |
|
2061 |
checksum = ZIO_CHECKSUM_FLETCHER_4; |
|
2986 | 2062 |
if (zfs_mdcomp_disable) |
2063 |
compress = ZIO_COMPRESS_EMPTY; |
|
2064 |
else |
|
2065 |
compress = ZIO_COMPRESS_LZJB; |
|
1544 | 2066 |
} else { |
2067 |
/* |
|
2068 |
* Allow dnode settings to override objset settings, |
|
2069 |
* except for metadata checksums. |
|
2070 |
*/ |
|
2071 |
if (dmu_ot[dn->dn_type].ot_metadata) { |
|
2072 |
checksum = os->os_md_checksum; |
|
2073 |
compress = zio_compress_select(dn->dn_compress, |
|
2074 |
os->os_md_compress); |
|
789 | 2075 |
} else { |
1544 | 2076 |
checksum = zio_checksum_select(dn->dn_checksum, |
2077 |
os->os_checksum); |
|
2078 |
compress = zio_compress_select(dn->dn_compress, |
|
2079 |
os->os_compress); |
|
789 | 2080 |
} |
1544 | 2081 |
} |
789 | 2082 |
#ifdef ZFS_DEBUG |
1544 | 2083 |
if (db->db_parent) { |
2084 |
ASSERT(list_link_active( |
|
2085 |
&db->db_parent->db_dirty_node[txg&TXG_MASK])); |
|
2086 |
ASSERT(db->db_parent == dn->dn_dbuf || |
|
2087 |
db->db_parent->db_level > 0); |
|
2088 |
if (dn->dn_object == DMU_META_DNODE_OBJECT || db->db_level > 0) |
|
2089 |
ASSERT(*data == db->db_buf); |
|
2090 |
} |
|
789 | 2091 |
#endif |
1544 | 2092 |
ASSERT3U(db->db_blkptr->blk_birth, <=, tx->tx_txg); |
2093 |
zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; |
|
2094 |
zb.zb_object = db->db.db_object; |
|
2095 |
zb.zb_level = db->db_level; |
|
2096 |
zb.zb_blkid = db->db_blkid; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1596
diff
changeset
|
2097 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1596
diff
changeset
|
2098 |
(void) arc_write(zio, os->os_spa, checksum, compress, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1596
diff
changeset
|
2099 |
dmu_get_replication_level(os->os_spa, &zb, dn->dn_type), txg, |
1544 | 2100 |
db->db_blkptr, *data, dbuf_write_done, db, |
2101 |
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_NOWAIT, &zb); |
|
2102 |
/* |
|
2103 |
* We can't access db after arc_write, since it could finish |
|
2104 |
* and be freed, and we have no locks on it. |
|
2105 |
*/ |
|
789 | 2106 |
} |
2107 |
||
2108 |
struct dbuf_arg { |
|
2109 |
objset_impl_t *os; |
|
2110 |
blkptr_t bp; |
|
2111 |
}; |
|
2112 |
||
2113 |
static void |
|
2114 |
dbuf_do_born(void *arg) |
|
2115 |
{ |
|
2116 |
struct dbuf_arg *da = arg; |
|
2117 |
dsl_dataset_block_born(da->os->os_dsl_dataset, |
|
2118 |
&da->bp, da->os->os_synctx); |
|
2119 |
kmem_free(da, sizeof (struct dbuf_arg)); |
|
2120 |
} |
|
2121 |
||
2122 |
static void |
|
2123 |
dbuf_do_kill(void *arg) |
|
2124 |
{ |
|
2125 |
struct dbuf_arg *da = arg; |
|
2126 |
dsl_dataset_block_kill(da->os->os_dsl_dataset, |
|
2127 |
&da->bp, da->os->os_synctx); |
|
2128 |
kmem_free(da, sizeof (struct dbuf_arg)); |
|
2129 |
} |
|
2130 |
||
2131 |
/* ARGSUSED */ |
|
2132 |
static void |
|
2133 |
dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) |
|
2134 |
{ |
|
2135 |
dmu_buf_impl_t *db = vdb; |
|
2136 |
dnode_t *dn = db->db_dnode; |
|
2137 |
objset_impl_t *os = dn->dn_objset; |
|
2138 |
uint64_t txg = zio->io_txg; |
|
2139 |
uint64_t fill = 0; |
|
2140 |
int i; |
|
2141 |
int old_size, new_size; |
|
2142 |
||
2143 |
ASSERT3U(zio->io_error, ==, 0); |
|
2144 |
||
2145 |
dprintf_dbuf_bp(db, &zio->io_bp_orig, "bp_orig: %s", ""); |
|
2146 |
||
2082 | 2147 |
old_size = bp_get_dasize(os->os_spa, &zio->io_bp_orig); |
2148 |
new_size = bp_get_dasize(os->os_spa, zio->io_bp); |
|
789 | 2149 |
|
2150 |
dnode_diduse_space(dn, new_size-old_size); |
|
2151 |
||
2152 |
mutex_enter(&db->db_mtx); |
|
2153 |
||
2237 | 2154 |
ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == NULL); |
2155 |
||
789 | 2156 |
if (db->db_dirtied == txg) |
2157 |
db->db_dirtied = 0; |
|
2158 |
||
2159 |
if (db->db_level == 0) { |
|
3093
71525e4187d5
6490569 verify arc bufs are not modified when they shouldn't be
ahrens
parents:
2986
diff
changeset
|
2160 |
arc_buf_t **old = &db->db_d.db_data_old[txg&TXG_MASK]; |
789 | 2161 |
|
2162 |
ASSERT(db->db_blkid != DB_BONUS_BLKID); |
|
2163 |
||
2164 |
if (*old != db->db_buf) |
|
1544 | 2165 |
VERIFY(arc_buf_remove_ref(*old, db) == 1); |
2166 |
else if (!BP_IS_HOLE(db->db_blkptr)) |
|
2167 |
arc_set_callback(db->db_buf, dbuf_do_evict, db); |
|
2168 |
else |
|
2169 |
ASSERT(arc_released(db->db_buf)); |
|
789 | 2170 |
*old = NULL; |
2171 |
db->db_data_pending = NULL; |
|
2172 |
||
2173 |
mutex_enter(&dn->dn_mtx); |
|
2174 |
if (db->db_blkid > dn->dn_phys->dn_maxblkid && |
|
2175 |
!BP_IS_HOLE(db->db_blkptr)) |
|
2176 |
dn->dn_phys->dn_maxblkid = db->db_blkid; |
|
2177 |
mutex_exit(&dn->dn_mtx); |
|
2178 |
||
2179 |
if (dn->dn_type == DMU_OT_DNODE) { |
|
2180 |
dnode_phys_t *dnp = db->db.db_data; |
|
2181 |
for (i = db->db.db_size >> DNODE_SHIFT; i > 0; |
|
2182 |
i--, dnp++) { |
|
2183 |
if (dnp->dn_type != DMU_OT_NONE) |
|
2184 |
fill++; |
|
2185 |
} |
|
2186 |
} else { |
|
2187 |
if (!BP_IS_HOLE(db->db_blkptr)) |
|
2188 |
fill = 1; |
|
2189 |
} |
|
2190 |
} else { |
|
2191 |
blkptr_t *bp = db->db.db_data; |
|
2192 |
ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); |
|
2193 |
if (!BP_IS_HOLE(db->db_blkptr)) { |
|
1163
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
2194 |
int epbs = |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
2195 |
dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; |
789 | 2196 |
ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, db->db.db_size); |
2197 |
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, |
|
2198 |
db->db.db_size); |
|
1163
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
2199 |
ASSERT3U(dn->dn_phys->dn_maxblkid |
4ba797920cc2
6343073 buf[i] == 0 assertion failure when running zvol_pstress
maybee
parents:
982
diff
changeset
|
2200 |
>> (db->db_level * epbs), >=, db->db_blkid); |
1544 | 2201 |
arc_set_callback(db->db_buf, dbuf_do_evict, db); |
789 | 2202 |
} |
2203 |
for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, bp++) { |
|
2204 |
if (BP_IS_HOLE(bp)) |
|
2205 |
continue; |
|
2206 |
ASSERT3U(BP_GET_LSIZE(bp), ==, |
|
2207 |
db->db_level == 1 ? dn->dn_datablksz : |
|
2208 |
(1<<dn->dn_phys->dn_indblkshift)); |
|
2209 |
fill += bp->blk_fill; |
|
2210 |
} |
|
2211 |
} |
|
2212 |
||
2213 |
if (!BP_IS_HOLE(db->db_blkptr)) { |
|
2214 |
db->db_blkptr->blk_fill = fill; |
|
2215 |
BP_SET_TYPE(db->db_blkptr, dn->dn_type); |
|
2216 |
BP_SET_LEVEL(db->db_blkptr, db->db_level); |
|
2217 |
} else { |
|
2218 |
ASSERT3U(fill, ==, 0); |
|
2219 |
ASSERT3U(db->db_blkptr->blk_fill, ==, 0); |
|
2220 |
} |
|
2221 |
||
2222 |
dprintf_dbuf_bp(db, db->db_blkptr, |
|
2223 |
"wrote %llu bytes to blkptr:", zio->io_size); |
|
2224 |
||
2225 |
ASSERT(db->db_parent == NULL || |
|
2226 |
list_link_active(&db->db_parent->db_dirty_node[txg&TXG_MASK])); |
|
2227 |
cv_broadcast(&db->db_changed); |
|
2228 |
ASSERT(db->db_dirtycnt > 0); |
|
2229 |
db->db_dirtycnt -= 1; |
|
2230 |
mutex_exit(&db->db_mtx); |
|
2231 |
||
2232 |
/* We must do this after we've set the bp's type and level */ |
|
2233 |
if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), |
|
2234 |
BP_IDENTITY(&zio->io_bp_orig))) { |
|
2235 |
struct dbuf_arg *da; |
|
2236 |
da = kmem_alloc(sizeof (struct dbuf_arg), KM_SLEEP); |
|
2237 |
da->os = os; |
|
2238 |
da->bp = *zio->io_bp; |
|
2239 |
(void) taskq_dispatch(dbuf_tq, dbuf_do_born, da, 0); |
|
2240 |
if (!BP_IS_HOLE(&zio->io_bp_orig)) { |
|
2241 |
da = kmem_alloc(sizeof (struct dbuf_arg), KM_SLEEP); |
|
2242 |
da->os = os; |
|
2243 |
da->bp = zio->io_bp_orig; |
|
2244 |
(void) taskq_dispatch(dbuf_tq, dbuf_do_kill, da, 0); |
|
2245 |
} |
|
2246 |
} |
|
2247 |
||
1544 | 2248 |
dbuf_rele(db, (void *)(uintptr_t)txg); |
789 | 2249 |
} |