125 avl_insert(t, zn, where); |
125 avl_insert(t, zn, where); |
126 |
126 |
127 return (0); |
127 return (0); |
128 } |
128 } |
129 |
129 |
|
130 static zil_header_t * |
|
131 zil_header_in_syncing_context(zilog_t *zilog) |
|
132 { |
|
133 return ((zil_header_t *)zilog->zl_header); |
|
134 } |
|
135 |
|
136 static void |
|
137 zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) |
|
138 { |
|
139 zio_cksum_t *zc = &bp->blk_cksum; |
|
140 |
|
141 zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL); |
|
142 zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL); |
|
143 zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); |
|
144 zc->zc_word[ZIL_ZC_SEQ] = 1ULL; |
|
145 } |
|
146 |
130 /* |
147 /* |
131 * Read a log block, make sure it's valid, and byteswap it if necessary. |
148 * Read a log block, make sure it's valid, and byteswap it if necessary. |
132 */ |
149 */ |
133 static int |
150 static int |
134 zil_read_log_block(zilog_t *zilog, blkptr_t *bp, char *buf) |
151 zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, arc_buf_t **abufpp) |
135 { |
152 { |
136 uint64_t blksz = BP_GET_LSIZE(bp); |
153 blkptr_t blk = *bp; |
137 zil_trailer_t *ztp = (zil_trailer_t *)(buf + blksz) - 1; |
|
138 zio_cksum_t cksum; |
|
139 zbookmark_t zb; |
154 zbookmark_t zb; |
140 int error; |
155 int error; |
141 |
156 |
142 zb.zb_objset = bp->blk_cksum.zc_word[2]; |
157 zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET]; |
143 zb.zb_object = 0; |
158 zb.zb_object = 0; |
144 zb.zb_level = -1; |
159 zb.zb_level = -1; |
145 zb.zb_blkid = bp->blk_cksum.zc_word[3]; |
160 zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ]; |
146 |
161 |
147 error = zio_wait(zio_read(NULL, zilog->zl_spa, bp, buf, blksz, |
162 *abufpp = NULL; |
148 NULL, NULL, ZIO_PRIORITY_SYNC_READ, |
163 |
149 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb)); |
164 error = arc_read(NULL, zilog->zl_spa, &blk, byteswap_uint64_array, |
150 if (error) { |
165 arc_getbuf_func, abufpp, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | |
151 dprintf_bp(bp, "zilog %p bp %p read failed, error %d: ", |
166 ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, ARC_WAIT, &zb); |
152 zilog, bp, error); |
167 |
153 return (error); |
168 if (error == 0) { |
154 } |
169 char *data = (*abufpp)->b_data; |
155 |
170 uint64_t blksz = BP_GET_LSIZE(bp); |
156 if (BP_SHOULD_BYTESWAP(bp)) |
171 zil_trailer_t *ztp = (zil_trailer_t *)(data + blksz) - 1; |
157 byteswap_uint64_array(buf, blksz); |
172 zio_cksum_t cksum = bp->blk_cksum; |
158 |
173 |
159 /* |
174 /* |
160 * Sequence numbers should be... sequential. The checksum verifier for |
175 * Sequence numbers should be... sequential. The checksum |
161 * the next block should be: <logid[0], logid[1], objset id, seq + 1>. |
176 * verifier for the next block should be bp's checksum plus 1. |
162 */ |
177 */ |
163 cksum = bp->blk_cksum; |
178 cksum.zc_word[ZIL_ZC_SEQ]++; |
164 cksum.zc_word[3]++; |
179 |
165 if (bcmp(&cksum, &ztp->zit_next_blk.blk_cksum, sizeof (cksum)) != 0) { |
180 if (bcmp(&cksum, &ztp->zit_next_blk.blk_cksum, sizeof (cksum))) |
166 dprintf_bp(bp, "zilog %p bp %p stale pointer: ", zilog, bp); |
181 error = ESTALE; |
167 return (ESTALE); |
182 else if (BP_IS_HOLE(&ztp->zit_next_blk)) |
168 } |
183 error = ENOENT; |
169 |
184 else if (ztp->zit_nused > (blksz - sizeof (zil_trailer_t))) |
170 if (BP_IS_HOLE(&ztp->zit_next_blk)) { |
185 error = EOVERFLOW; |
171 dprintf_bp(bp, "zilog %p bp %p hole: ", zilog, bp); |
186 |
172 return (ENOENT); |
187 if (error) { |
173 } |
188 VERIFY(arc_buf_remove_ref(*abufpp, abufpp) == 1); |
174 |
189 *abufpp = NULL; |
175 if (ztp->zit_nused > (blksz - sizeof (zil_trailer_t))) { |
190 } |
176 dprintf("zilog %p bp %p nused exceeds blksz\n", zilog, bp); |
191 } |
177 return (EOVERFLOW); |
192 |
178 } |
193 dprintf("error %d on %llu:%llu\n", error, zb.zb_objset, zb.zb_blkid); |
179 |
194 |
180 dprintf_bp(bp, "zilog %p bp %p good block: ", zilog, bp); |
195 return (error); |
181 |
|
182 return (0); |
|
183 } |
196 } |
184 |
197 |
185 /* |
198 /* |
186 * Parse the intent log, and call parse_func for each valid record within. |
199 * Parse the intent log, and call parse_func for each valid record within. |
187 */ |
200 * Return the highest sequence number. |
188 void |
201 */ |
|
202 uint64_t |
189 zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, |
203 zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, |
190 zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) |
204 zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) |
191 { |
205 { |
192 blkptr_t blk; |
206 const zil_header_t *zh = zilog->zl_header; |
|
207 uint64_t claim_seq = zh->zh_claim_seq; |
|
208 uint64_t seq = 0; |
|
209 uint64_t max_seq = 0; |
|
210 blkptr_t blk = zh->zh_log; |
|
211 arc_buf_t *abuf; |
193 char *lrbuf, *lrp; |
212 char *lrbuf, *lrp; |
194 zil_trailer_t *ztp; |
213 zil_trailer_t *ztp; |
195 int reclen, error; |
214 int reclen, error; |
196 |
215 |
197 blk = zilog->zl_header->zh_log; |
|
198 if (BP_IS_HOLE(&blk)) |
216 if (BP_IS_HOLE(&blk)) |
199 return; |
217 return (max_seq); |
200 |
218 |
201 /* |
219 /* |
202 * Starting at the block pointed to by zh_log we read the log chain. |
220 * Starting at the block pointed to by zh_log we read the log chain. |
203 * For each block in the chain we strongly check that block to |
221 * For each block in the chain we strongly check that block to |
204 * ensure its validity. We stop when an invalid block is found. |
222 * ensure its validity. We stop when an invalid block is found. |
205 * For each block pointer in the chain we call parse_blk_func(). |
223 * For each block pointer in the chain we call parse_blk_func(). |
206 * For each record in each valid block we call parse_lr_func(). |
224 * For each record in each valid block we call parse_lr_func(). |
|
225 * If the log has been claimed, stop if we encounter a sequence |
|
226 * number greater than the highest claimed sequence number. |
207 */ |
227 */ |
208 zil_dva_tree_init(&zilog->zl_dva_tree); |
228 zil_dva_tree_init(&zilog->zl_dva_tree); |
209 lrbuf = zio_buf_alloc(SPA_MAXBLOCKSIZE); |
|
210 for (;;) { |
229 for (;;) { |
211 error = zil_read_log_block(zilog, &blk, lrbuf); |
230 seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ]; |
|
231 |
|
232 if (claim_seq != 0 && seq > claim_seq) |
|
233 break; |
|
234 |
|
235 ASSERT(max_seq < seq); |
|
236 max_seq = seq; |
|
237 |
|
238 error = zil_read_log_block(zilog, &blk, &abuf); |
212 |
239 |
213 if (parse_blk_func != NULL) |
240 if (parse_blk_func != NULL) |
214 parse_blk_func(zilog, &blk, arg, txg); |
241 parse_blk_func(zilog, &blk, arg, txg); |
215 |
242 |
216 if (error) |
243 if (error) |
217 break; |
244 break; |
218 |
245 |
|
246 lrbuf = abuf->b_data; |
219 ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; |
247 ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; |
220 blk = ztp->zit_next_blk; |
248 blk = ztp->zit_next_blk; |
221 |
249 |
222 if (parse_lr_func == NULL) |
250 if (parse_lr_func == NULL) { |
|
251 VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); |
223 continue; |
252 continue; |
|
253 } |
224 |
254 |
225 for (lrp = lrbuf; lrp < lrbuf + ztp->zit_nused; lrp += reclen) { |
255 for (lrp = lrbuf; lrp < lrbuf + ztp->zit_nused; lrp += reclen) { |
226 lr_t *lr = (lr_t *)lrp; |
256 lr_t *lr = (lr_t *)lrp; |
227 reclen = lr->lrc_reclen; |
257 reclen = lr->lrc_reclen; |
228 ASSERT3U(reclen, >=, sizeof (lr_t)); |
258 ASSERT3U(reclen, >=, sizeof (lr_t)); |
229 parse_lr_func(zilog, lr, arg, txg); |
259 parse_lr_func(zilog, lr, arg, txg); |
230 } |
260 } |
231 } |
261 VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); |
232 zio_buf_free(lrbuf, SPA_MAXBLOCKSIZE); |
262 } |
233 zil_dva_tree_fini(&zilog->zl_dva_tree); |
263 zil_dva_tree_fini(&zilog->zl_dva_tree); |
|
264 |
|
265 return (max_seq); |
234 } |
266 } |
235 |
267 |
236 /* ARGSUSED */ |
268 /* ARGSUSED */ |
237 static void |
269 static void |
238 zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg) |
270 zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg) |
239 { |
271 { |
240 spa_t *spa = zilog->zl_spa; |
272 spa_t *spa = zilog->zl_spa; |
241 int err; |
273 int err; |
242 |
|
243 dprintf_bp(bp, "first_txg %llu: ", first_txg); |
|
244 |
274 |
245 /* |
275 /* |
246 * Claim log block if not already committed and not already claimed. |
276 * Claim log block if not already committed and not already claimed. |
247 */ |
277 */ |
248 if (bp->blk_birth >= first_txg && |
278 if (bp->blk_birth >= first_txg && |
289 * Create an on-disk intent log. |
319 * Create an on-disk intent log. |
290 */ |
320 */ |
291 static void |
321 static void |
292 zil_create(zilog_t *zilog) |
322 zil_create(zilog_t *zilog) |
293 { |
323 { |
|
324 const zil_header_t *zh = zilog->zl_header; |
294 lwb_t *lwb; |
325 lwb_t *lwb; |
295 uint64_t txg; |
326 uint64_t txg = 0; |
296 dmu_tx_t *tx; |
327 dmu_tx_t *tx = NULL; |
297 blkptr_t blk; |
328 blkptr_t blk; |
298 int error; |
329 int error = 0; |
299 int no_blk; |
330 |
300 |
331 /* |
301 ASSERT(zilog->zl_header->zh_claim_txg == 0); |
332 * Wait for any previous destroy to complete. |
302 ASSERT(zilog->zl_header->zh_replay_seq == 0); |
333 */ |
303 |
334 txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); |
304 /* |
335 |
305 * Initialize the log header block. |
336 ASSERT(zh->zh_claim_txg == 0); |
306 */ |
337 ASSERT(zh->zh_replay_seq == 0); |
307 tx = dmu_tx_create(zilog->zl_os); |
338 |
308 (void) dmu_tx_assign(tx, TXG_WAIT); |
339 blk = zh->zh_log; |
309 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); |
340 |
310 txg = dmu_tx_get_txg(tx); |
341 /* |
311 |
342 * If we don't already have an initial log block, allocate one now. |
312 /* |
343 */ |
313 * If we don't have a log block already then |
344 if (BP_IS_HOLE(&blk)) { |
314 * allocate the first log block and assign its checksum verifier. |
345 tx = dmu_tx_create(zilog->zl_os); |
315 */ |
346 (void) dmu_tx_assign(tx, TXG_WAIT); |
316 no_blk = BP_IS_HOLE(&zilog->zl_header->zh_log); |
347 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); |
317 if (no_blk) { |
348 txg = dmu_tx_get_txg(tx); |
318 error = zio_alloc_blk(zilog->zl_spa, ZIO_CHECKSUM_ZILOG, |
349 |
319 ZIL_MIN_BLKSZ, &blk, txg); |
350 error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk, txg); |
320 } else { |
351 |
321 blk = zilog->zl_header->zh_log; |
352 if (error == 0) |
322 error = 0; |
353 zil_init_log_chain(zilog, &blk); |
323 } |
354 } |
|
355 |
|
356 /* |
|
357 * Allocate a log write buffer (lwb) for the first log block. |
|
358 */ |
324 if (error == 0) { |
359 if (error == 0) { |
325 ZIO_SET_CHECKSUM(&blk.blk_cksum, |
|
326 spa_get_random(-1ULL), spa_get_random(-1ULL), |
|
327 dmu_objset_id(zilog->zl_os), 1ULL); |
|
328 |
|
329 /* |
|
330 * Allocate a log write buffer (lwb) for the first log block. |
|
331 */ |
|
332 lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); |
360 lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); |
333 lwb->lwb_zilog = zilog; |
361 lwb->lwb_zilog = zilog; |
334 lwb->lwb_blk = blk; |
362 lwb->lwb_blk = blk; |
335 lwb->lwb_nused = 0; |
363 lwb->lwb_nused = 0; |
336 lwb->lwb_sz = BP_GET_LSIZE(&lwb->lwb_blk); |
364 lwb->lwb_sz = BP_GET_LSIZE(&lwb->lwb_blk); |
341 mutex_enter(&zilog->zl_lock); |
369 mutex_enter(&zilog->zl_lock); |
342 list_insert_tail(&zilog->zl_lwb_list, lwb); |
370 list_insert_tail(&zilog->zl_lwb_list, lwb); |
343 mutex_exit(&zilog->zl_lock); |
371 mutex_exit(&zilog->zl_lock); |
344 } |
372 } |
345 |
373 |
346 dmu_tx_commit(tx); |
374 /* |
347 if (no_blk) |
375 * If we just allocated the first log block, commit our transaction |
|
376 * and wait for zil_sync() to stuff the block poiner into zh_log. |
|
377 * (zh is part of the MOS, so we cannot modify it in open context.) |
|
378 */ |
|
379 if (tx != NULL) { |
|
380 dmu_tx_commit(tx); |
348 txg_wait_synced(zilog->zl_dmu_pool, txg); |
381 txg_wait_synced(zilog->zl_dmu_pool, txg); |
|
382 } |
|
383 |
|
384 ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0); |
349 } |
385 } |
350 |
386 |
351 /* |
387 /* |
352 * In one tx, free all log blocks and clear the log header. |
388 * In one tx, free all log blocks and clear the log header. |
|
389 * If keep_first is set, then we're replaying a log with no content. |
|
390 * We want to keep the first block, however, so that the first |
|
391 * synchronous transaction doesn't require a txg_wait_synced() |
|
392 * in zil_create(). We don't need to txg_wait_synced() here either |
|
393 * when keep_first is set, because both zil_create() and zil_destroy() |
|
394 * will wait for any in-progress destroys to complete. |
353 */ |
395 */ |
354 void |
396 void |
355 zil_destroy(zilog_t *zilog) |
397 zil_destroy(zilog_t *zilog, boolean_t keep_first) |
356 { |
398 { |
|
399 const zil_header_t *zh = zilog->zl_header; |
|
400 lwb_t *lwb; |
357 dmu_tx_t *tx; |
401 dmu_tx_t *tx; |
358 uint64_t txg; |
402 uint64_t txg; |
359 |
403 |
360 mutex_enter(&zilog->zl_destroy_lock); |
404 /* |
361 |
405 * Wait for any previous destroy to complete. |
362 if (BP_IS_HOLE(&zilog->zl_header->zh_log)) { |
406 */ |
363 mutex_exit(&zilog->zl_destroy_lock); |
407 txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); |
|
408 |
|
409 if (BP_IS_HOLE(&zh->zh_log)) |
364 return; |
410 return; |
365 } |
|
366 |
411 |
367 tx = dmu_tx_create(zilog->zl_os); |
412 tx = dmu_tx_create(zilog->zl_os); |
368 (void) dmu_tx_assign(tx, TXG_WAIT); |
413 (void) dmu_tx_assign(tx, TXG_WAIT); |
369 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); |
414 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); |
370 txg = dmu_tx_get_txg(tx); |
415 txg = dmu_tx_get_txg(tx); |
371 |
416 |
372 zil_parse(zilog, zil_free_log_block, zil_free_log_record, tx, |
417 mutex_enter(&zilog->zl_lock); |
373 zilog->zl_header->zh_claim_txg); |
418 |
374 /* |
419 ASSERT3U(zilog->zl_destroy_txg, <, txg); |
375 * zil_sync clears the zil header as soon as the zl_destroy_txg commits |
|
376 */ |
|
377 zilog->zl_destroy_txg = txg; |
420 zilog->zl_destroy_txg = txg; |
|
421 zilog->zl_keep_first = keep_first; |
|
422 |
|
423 if (!list_is_empty(&zilog->zl_lwb_list)) { |
|
424 ASSERT(zh->zh_claim_txg == 0); |
|
425 ASSERT(!keep_first); |
|
426 while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { |
|
427 list_remove(&zilog->zl_lwb_list, lwb); |
|
428 if (lwb->lwb_buf != NULL) |
|
429 zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); |
|
430 zio_free_blk(zilog->zl_spa, &lwb->lwb_blk, txg); |
|
431 kmem_cache_free(zil_lwb_cache, lwb); |
|
432 } |
|
433 mutex_exit(&zilog->zl_lock); |
|
434 } else { |
|
435 mutex_exit(&zilog->zl_lock); |
|
436 if (!keep_first) { |
|
437 (void) zil_parse(zilog, zil_free_log_block, |
|
438 zil_free_log_record, tx, zh->zh_claim_txg); |
|
439 } |
|
440 } |
378 |
441 |
379 dmu_tx_commit(tx); |
442 dmu_tx_commit(tx); |
|
443 |
|
444 if (keep_first) /* no need to wait in this case */ |
|
445 return; |
|
446 |
380 txg_wait_synced(zilog->zl_dmu_pool, txg); |
447 txg_wait_synced(zilog->zl_dmu_pool, txg); |
381 |
448 ASSERT(BP_IS_HOLE(&zh->zh_log)); |
382 mutex_exit(&zilog->zl_destroy_lock); |
|
383 } |
449 } |
384 |
450 |
385 void |
451 void |
386 zil_claim(char *osname, void *txarg) |
452 zil_claim(char *osname, void *txarg) |
387 { |
453 { |
397 cmn_err(CE_WARN, "can't process intent log for %s", osname); |
463 cmn_err(CE_WARN, "can't process intent log for %s", osname); |
398 return; |
464 return; |
399 } |
465 } |
400 |
466 |
401 zilog = dmu_objset_zil(os); |
467 zilog = dmu_objset_zil(os); |
402 zh = zilog->zl_header; |
468 zh = zil_header_in_syncing_context(zilog); |
403 |
469 |
404 /* |
470 /* |
405 * Claim all log blocks if we haven't already done so. |
471 * Claim all log blocks if we haven't already done so, and remember |
|
472 * the highest claimed sequence number. This ensures that if we can |
|
473 * read only part of the log now (e.g. due to a missing device), |
|
474 * but we can read the entire log later, we will not try to replay |
|
475 * or destroy beyond the last block we successfully claimed. |
406 */ |
476 */ |
407 ASSERT3U(zh->zh_claim_txg, <=, first_txg); |
477 ASSERT3U(zh->zh_claim_txg, <=, first_txg); |
408 if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { |
478 if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { |
409 zh->zh_claim_txg = first_txg; |
479 zh->zh_claim_txg = first_txg; |
410 zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, |
480 zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block, |
411 tx, first_txg); |
481 zil_claim_log_record, tx, first_txg); |
412 dsl_dataset_dirty(dmu_objset_ds(os), tx); |
482 dsl_dataset_dirty(dmu_objset_ds(os), tx); |
413 } |
483 } |
|
484 |
414 ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); |
485 ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); |
415 dmu_objset_close(os); |
486 dmu_objset_close(os); |
416 } |
487 } |
417 |
488 |
418 void |
489 void |
947 * Called in syncing context to free committed log blocks and update log header. |
1017 * Called in syncing context to free committed log blocks and update log header. |
948 */ |
1018 */ |
949 void |
1019 void |
950 zil_sync(zilog_t *zilog, dmu_tx_t *tx) |
1020 zil_sync(zilog_t *zilog, dmu_tx_t *tx) |
951 { |
1021 { |
|
1022 zil_header_t *zh = zil_header_in_syncing_context(zilog); |
952 uint64_t txg = dmu_tx_get_txg(tx); |
1023 uint64_t txg = dmu_tx_get_txg(tx); |
953 spa_t *spa = zilog->zl_spa; |
1024 spa_t *spa = zilog->zl_spa; |
954 lwb_t *lwb; |
1025 lwb_t *lwb; |
955 |
1026 |
|
1027 mutex_enter(&zilog->zl_lock); |
|
1028 |
956 ASSERT(zilog->zl_stop_sync == 0); |
1029 ASSERT(zilog->zl_stop_sync == 0); |
957 |
1030 |
958 zilog->zl_header->zh_replay_seq = zilog->zl_replay_seq[txg & TXG_MASK]; |
1031 zh->zh_replay_seq = zilog->zl_replay_seq[txg & TXG_MASK]; |
959 |
1032 |
960 if (zilog->zl_destroy_txg == txg) { |
1033 if (zilog->zl_destroy_txg == txg) { |
961 bzero(zilog->zl_header, sizeof (zil_header_t)); |
1034 blkptr_t blk = zh->zh_log; |
|
1035 |
|
1036 ASSERT(list_head(&zilog->zl_lwb_list) == NULL); |
|
1037 ASSERT(spa_sync_pass(spa) == 1); |
|
1038 |
|
1039 bzero(zh, sizeof (zil_header_t)); |
962 bzero(zilog->zl_replay_seq, sizeof (zilog->zl_replay_seq)); |
1040 bzero(zilog->zl_replay_seq, sizeof (zilog->zl_replay_seq)); |
963 zilog->zl_destroy_txg = 0; |
1041 |
964 } |
1042 if (zilog->zl_keep_first) { |
965 |
1043 /* |
966 mutex_enter(&zilog->zl_lock); |
1044 * If this block was part of log chain that couldn't |
|
1045 * be claimed because a device was missing during |
|
1046 * zil_claim(), but that device later returns, |
|
1047 * then this block could erroneously appear valid. |
|
1048 * To guard against this, assign a new GUID to the new |
|
1049 * log chain so it doesn't matter what blk points to. |
|
1050 */ |
|
1051 zil_init_log_chain(zilog, &blk); |
|
1052 zh->zh_log = blk; |
|
1053 } |
|
1054 } |
|
1055 |
967 for (;;) { |
1056 for (;;) { |
968 lwb = list_head(&zilog->zl_lwb_list); |
1057 lwb = list_head(&zilog->zl_lwb_list); |
969 if (lwb == NULL) { |
1058 if (lwb == NULL) { |
970 mutex_exit(&zilog->zl_lock); |
1059 mutex_exit(&zilog->zl_lock); |
971 return; |
1060 return; |
1103 * contains all the data it's supposed to, and has an empty intent log. |
1204 * contains all the data it's supposed to, and has an empty intent log. |
1104 */ |
1205 */ |
1105 int |
1206 int |
1106 zil_suspend(zilog_t *zilog) |
1207 zil_suspend(zilog_t *zilog) |
1107 { |
1208 { |
|
1209 const zil_header_t *zh = zilog->zl_header; |
1108 lwb_t *lwb; |
1210 lwb_t *lwb; |
1109 |
1211 |
1110 mutex_enter(&zilog->zl_lock); |
1212 mutex_enter(&zilog->zl_lock); |
1111 if (zilog->zl_header->zh_claim_txg != 0) { /* unplayed log */ |
1213 if (zh->zh_claim_txg != 0) { /* unplayed log */ |
1112 mutex_exit(&zilog->zl_lock); |
1214 mutex_exit(&zilog->zl_lock); |
1113 return (EBUSY); |
1215 return (EBUSY); |
1114 } |
1216 } |
1115 zilog->zl_suspend++; |
1217 if (zilog->zl_suspend++ != 0) { |
|
1218 /* |
|
1219 * Someone else already began a suspend. |
|
1220 * Just wait for them to finish. |
|
1221 */ |
|
1222 while (zilog->zl_suspending) |
|
1223 cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); |
|
1224 ASSERT(BP_IS_HOLE(&zh->zh_log)); |
|
1225 mutex_exit(&zilog->zl_lock); |
|
1226 return (0); |
|
1227 } |
|
1228 zilog->zl_suspending = B_TRUE; |
1116 mutex_exit(&zilog->zl_lock); |
1229 mutex_exit(&zilog->zl_lock); |
1117 |
1230 |
1118 zil_commit(zilog, UINT64_MAX, FSYNC); |
1231 zil_commit(zilog, UINT64_MAX, FSYNC); |
1119 |
1232 |
1120 mutex_enter(&zilog->zl_lock); |
1233 mutex_enter(&zilog->zl_lock); |
1121 while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { |
1234 for (;;) { |
1122 if (lwb->lwb_buf != NULL) { |
1235 /* |
1123 /* |
1236 * Wait for any in-flight log writes to complete. |
1124 * Wait for the buffer if it's in the process of |
1237 */ |
1125 * being written. |
1238 for (lwb = list_head(&zilog->zl_lwb_list); lwb != NULL; |
1126 */ |
1239 lwb = list_next(&zilog->zl_lwb_list, lwb)) |
1127 if ((lwb->lwb_seq != 0) && |
1240 if (lwb->lwb_seq != 0 && lwb->lwb_state != SEQ_COMPLETE) |
1128 (lwb->lwb_state != SEQ_COMPLETE)) { |
1241 break; |
1129 cv_wait(&zilog->zl_cv_seq, &zilog->zl_lock); |
1242 |
1130 continue; |
1243 if (lwb == NULL) |
1131 } |
1244 break; |
1132 zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); |
1245 |
1133 } |
1246 cv_wait(&zilog->zl_cv_seq, &zilog->zl_lock); |
1134 list_remove(&zilog->zl_lwb_list, lwb); |
1247 } |
1135 kmem_cache_free(zil_lwb_cache, lwb); |
1248 |
1136 } |
|
1137 mutex_exit(&zilog->zl_lock); |
1249 mutex_exit(&zilog->zl_lock); |
1138 |
1250 |
1139 zil_destroy(zilog); |
1251 zil_destroy(zilog, B_FALSE); |
|
1252 |
|
1253 mutex_enter(&zilog->zl_lock); |
|
1254 ASSERT(BP_IS_HOLE(&zh->zh_log)); |
|
1255 zilog->zl_suspending = B_FALSE; |
|
1256 cv_broadcast(&zilog->zl_cv_suspend); |
|
1257 mutex_exit(&zilog->zl_lock); |
1140 |
1258 |
1141 return (0); |
1259 return (0); |
1142 } |
1260 } |
1143 |
1261 |
1144 void |
1262 void |
1308 void |
1426 void |
1309 zil_replay(objset_t *os, void *arg, uint64_t *txgp, |
1427 zil_replay(objset_t *os, void *arg, uint64_t *txgp, |
1310 zil_replay_func_t *replay_func[TX_MAX_TYPE], void (*rm_sync)(void *arg)) |
1428 zil_replay_func_t *replay_func[TX_MAX_TYPE], void (*rm_sync)(void *arg)) |
1311 { |
1429 { |
1312 zilog_t *zilog = dmu_objset_zil(os); |
1430 zilog_t *zilog = dmu_objset_zil(os); |
1313 zil_replay_arg_t zr; |
1431 const zil_header_t *zh = zilog->zl_header; |
|
1432 zil_replay_arg_t zr; |
1314 |
1433 |
1315 if (zil_empty(zilog)) { |
1434 if (zil_empty(zilog)) { |
1316 /* |
1435 zil_destroy(zilog, B_TRUE); |
1317 * Initialise the log header but don't free the log block |
|
1318 * which will get reused. |
|
1319 */ |
|
1320 zilog->zl_header->zh_claim_txg = 0; |
|
1321 zilog->zl_header->zh_replay_seq = 0; |
|
1322 return; |
1436 return; |
1323 } |
1437 } |
1324 |
1438 |
1325 zr.zr_os = os; |
1439 zr.zr_os = os; |
1326 zr.zr_replay = replay_func; |
1440 zr.zr_replay = replay_func; |
1327 zr.zr_arg = arg; |
1441 zr.zr_arg = arg; |
1328 zr.zr_rm_sync = rm_sync; |
1442 zr.zr_rm_sync = rm_sync; |
1329 zr.zr_txgp = txgp; |
1443 zr.zr_txgp = txgp; |
1330 zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zilog->zl_header->zh_log); |
1444 zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); |
1331 zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); |
1445 zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); |
1332 |
1446 |
1333 /* |
1447 /* |
1334 * Wait for in-progress removes to sync before starting replay. |
1448 * Wait for in-progress removes to sync before starting replay. |
1335 */ |
1449 */ |
1336 if (rm_sync != NULL) |
1450 if (rm_sync != NULL) |
1337 rm_sync(arg); |
1451 rm_sync(arg); |
1338 txg_wait_synced(zilog->zl_dmu_pool, 0); |
1452 txg_wait_synced(zilog->zl_dmu_pool, 0); |
1339 |
1453 |
1340 zilog->zl_stop_replay = 0; |
1454 zilog->zl_stop_replay = 0; |
1341 zil_parse(zilog, NULL, zil_replay_log_record, &zr, |
1455 (void) zil_parse(zilog, NULL, zil_replay_log_record, &zr, |
1342 zilog->zl_header->zh_claim_txg); |
1456 zh->zh_claim_txg); |
1343 kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); |
1457 kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); |
1344 |
1458 |
1345 zil_destroy(zilog); |
1459 zil_destroy(zilog, B_FALSE); |
1346 } |
1460 } |
1347 |
1461 |
1348 /* |
1462 /* |
1349 * Report whether all transactions are committed |
1463 * Report whether all transactions are committed |
1350 */ |
1464 */ |
1351 int |
1465 int |
1352 zil_is_committed(zilog_t *zilog) |
1466 zil_is_committed(zilog_t *zilog) |
1353 { |
1467 { |
1354 lwb_t *lwb; |
1468 lwb_t *lwb; |
1355 |
1469 |
1356 if (zilog == NULL || list_head(&zilog->zl_itx_list)) |
1470 if (!list_is_empty(&zilog->zl_itx_list)) |
1357 return (B_FALSE); |
1471 return (B_FALSE); |
1358 |
1472 |
1359 /* |
1473 /* |
1360 * A log write buffer at the head of the list that is not UNWRITTEN |
1474 * A log write buffer at the head of the list that is not UNWRITTEN |
1361 * means there's a lwb yet to be freed after a txg commit |
1475 * means there's a lwb yet to be freed after a txg commit |