usr/src/uts/common/fs/zfs/dmu_traverse.c
changeset 13700 2889e2596bd6
parent 13055 8c712bbb18ea
child 13764 38b4aca480b3
equal deleted inserted replaced
13699:733714f4dc24 13700:2889e2596bd6
    18  *
    18  *
    19  * CDDL HEADER END
    19  * CDDL HEADER END
    20  */
    20  */
    21 /*
    21 /*
    22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
    22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       
    23  * Copyright (c) 2012 by Delphix. All rights reserved.
    23  */
    24  */
    24 
    25 
    25 #include <sys/zfs_context.h>
    26 #include <sys/zfs_context.h>
    26 #include <sys/dmu_objset.h>
    27 #include <sys/dmu_objset.h>
    27 #include <sys/dmu_traverse.h>
    28 #include <sys/dmu_traverse.h>
    51 typedef struct traverse_data {
    52 typedef struct traverse_data {
    52 	spa_t *td_spa;
    53 	spa_t *td_spa;
    53 	uint64_t td_objset;
    54 	uint64_t td_objset;
    54 	blkptr_t *td_rootbp;
    55 	blkptr_t *td_rootbp;
    55 	uint64_t td_min_txg;
    56 	uint64_t td_min_txg;
       
    57 	zbookmark_t *td_resume;
    56 	int td_flags;
    58 	int td_flags;
    57 	prefetch_data_t *td_pfd;
    59 	prefetch_data_t *td_pfd;
    58 	blkptr_cb_t *td_func;
    60 	blkptr_cb_t *td_func;
    59 	void *td_arg;
    61 	void *td_arg;
    60 } traverse_data_t;
    62 } traverse_data_t;
   126 	    claim_txg);
   128 	    claim_txg);
   127 
   129 
   128 	zil_free(zilog);
   130 	zil_free(zilog);
   129 }
   131 }
   130 
   132 
       
   133 typedef enum resume_skip {
       
   134 	RESUME_SKIP_ALL,
       
   135 	RESUME_SKIP_NONE,
       
   136 	RESUME_SKIP_CHILDREN
       
   137 } resume_skip_t;
       
   138 
       
   139 /*
       
   140  * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
       
   141  * the block indicated by zb does not need to be visited at all. Returns
       
   142  * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
       
   143  * resume point. This indicates that this block should be visited but not its
       
   144  * children (since they must have been visited in a previous traversal).
       
   145  * Otherwise returns RESUME_SKIP_NONE.
       
   146  */
       
   147 static resume_skip_t
       
   148 resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
       
   149     const zbookmark_t *zb)
       
   150 {
       
   151 	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
       
   152 		/*
       
   153 		 * If we already visited this bp & everything below,
       
   154 		 * don't bother doing it again.
       
   155 		 */
       
   156 		if (zbookmark_is_before(dnp, zb, td->td_resume))
       
   157 			return (RESUME_SKIP_ALL);
       
   158 
       
   159 		/*
       
   160 		 * If we found the block we're trying to resume from, zero
       
   161 		 * the bookmark out to indicate that we have resumed.
       
   162 		 */
       
   163 		ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
       
   164 		if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
       
   165 			bzero(td->td_resume, sizeof (*zb));
       
   166 			if (td->td_flags & TRAVERSE_POST)
       
   167 				return (RESUME_SKIP_CHILDREN);
       
   168 		}
       
   169 	}
       
   170 	return (RESUME_SKIP_NONE);
       
   171 }
       
   172 
       
   173 static void
       
   174 traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
       
   175 {
       
   176 	ASSERT(td->td_resume != NULL);
       
   177 	ASSERT3U(zb->zb_level, ==, 0);
       
   178 	bcopy(zb, td->td_resume, sizeof (*td->td_resume));
       
   179 }
       
   180 
   131 static int
   181 static int
   132 traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
   182 traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
   133     arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
   183     arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
   134 {
   184 {
   135 	zbookmark_t czb;
   185 	zbookmark_t czb;
   136 	int err = 0, lasterr = 0;
   186 	int err = 0, lasterr = 0;
   137 	arc_buf_t *buf = NULL;
   187 	arc_buf_t *buf = NULL;
   138 	prefetch_data_t *pd = td->td_pfd;
   188 	prefetch_data_t *pd = td->td_pfd;
   139 	boolean_t hard = td->td_flags & TRAVERSE_HARD;
   189 	boolean_t hard = td->td_flags & TRAVERSE_HARD;
   140 
   190 	boolean_t pause = B_FALSE;
   141 	if (bp->blk_birth == 0) {
   191 
       
   192 	switch (resume_skip_check(td, dnp, zb)) {
       
   193 	case RESUME_SKIP_ALL:
       
   194 		return (0);
       
   195 	case RESUME_SKIP_CHILDREN:
       
   196 		goto post;
       
   197 	case RESUME_SKIP_NONE:
       
   198 		break;
       
   199 	default:
       
   200 		ASSERT(0);
       
   201 	}
       
   202 
       
   203 	if (BP_IS_HOLE(bp)) {
   142 		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
   204 		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
   143 		    td->td_arg);
   205 		    td->td_arg);
   144 		return (err);
   206 		return (err);
   145 	}
   207 	}
   146 
   208 
   162 	if (td->td_flags & TRAVERSE_PRE) {
   224 	if (td->td_flags & TRAVERSE_PRE) {
   163 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
   225 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
   164 		    td->td_arg);
   226 		    td->td_arg);
   165 		if (err == TRAVERSE_VISIT_NO_CHILDREN)
   227 		if (err == TRAVERSE_VISIT_NO_CHILDREN)
   166 			return (0);
   228 			return (0);
   167 		if (err)
   229 		if (err == ERESTART)
   168 			return (err);
   230 			pause = B_TRUE; /* handle pausing at a common point */
       
   231 		if (err != 0)
       
   232 			goto post;
   169 	}
   233 	}
   170 
   234 
   171 	if (BP_GET_LEVEL(bp) > 0) {
   235 	if (BP_GET_LEVEL(bp) > 0) {
   172 		uint32_t flags = ARC_WAIT;
   236 		uint32_t flags = ARC_WAIT;
   173 		int i;
   237 		int i;
   251 	}
   315 	}
   252 
   316 
   253 	if (buf)
   317 	if (buf)
   254 		(void) arc_buf_remove_ref(buf, &buf);
   318 		(void) arc_buf_remove_ref(buf, &buf);
   255 
   319 
       
   320 post:
   256 	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
   321 	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
   257 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
   322 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
   258 		    td->td_arg);
   323 		    td->td_arg);
       
   324 		if (err == ERESTART)
       
   325 			pause = B_TRUE;
       
   326 	}
       
   327 
       
   328 	if (pause && td->td_resume != NULL) {
       
   329 		ASSERT3U(err, ==, ERESTART);
       
   330 		ASSERT(!hard);
       
   331 		traverse_pause(td, zb);
   259 	}
   332 	}
   260 
   333 
   261 	return (err != 0 ? err : lasterr);
   334 	return (err != 0 ? err : lasterr);
   262 }
   335 }
   263 
   336 
   351 /*
   424 /*
   352  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
   425  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
   353  * in syncing context).
   426  * in syncing context).
   354  */
   427  */
   355 static int
   428 static int
   356 traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
   429 traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
   357     uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
   430     uint64_t txg_start, zbookmark_t *resume, int flags,
       
   431     blkptr_cb_t func, void *arg)
   358 {
   432 {
   359 	traverse_data_t td;
   433 	traverse_data_t td;
   360 	prefetch_data_t pd = { 0 };
   434 	prefetch_data_t pd = { 0 };
   361 	zbookmark_t czb;
   435 	zbookmark_t czb;
   362 	int err;
   436 	int err;
   363 
   437 
       
   438 	ASSERT(ds == NULL || objset == ds->ds_object);
       
   439 	ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
       
   440 
   364 	td.td_spa = spa;
   441 	td.td_spa = spa;
   365 	td.td_objset = ds ? ds->ds_object : 0;
   442 	td.td_objset = objset;
   366 	td.td_rootbp = rootbp;
   443 	td.td_rootbp = rootbp;
   367 	td.td_min_txg = txg_start;
   444 	td.td_min_txg = txg_start;
       
   445 	td.td_resume = resume;
   368 	td.td_func = func;
   446 	td.td_func = func;
   369 	td.td_arg = arg;
   447 	td.td_arg = arg;
   370 	td.td_pfd = &pd;
   448 	td.td_pfd = &pd;
   371 	td.td_flags = flags;
   449 	td.td_flags = flags;
   372 
   450 
   414  */
   492  */
   415 int
   493 int
   416 traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
   494 traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
   417     blkptr_cb_t func, void *arg)
   495     blkptr_cb_t func, void *arg)
   418 {
   496 {
   419 	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
   497 	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
   420 	    &ds->ds_phys->ds_bp, txg_start, flags, func, arg));
   498 	    &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
       
   499 }
       
   500 
       
   501 int
       
   502 traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
       
   503     uint64_t txg_start, zbookmark_t *resume, int flags,
       
   504     blkptr_cb_t func, void *arg)
       
   505 {
       
   506 	return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
       
   507 	    blkptr, txg_start, resume, flags, func, arg));
   421 }
   508 }
   422 
   509 
   423 /*
   510 /*
   424  * NB: pool must not be changing on-disk (eg, from zdb or sync context).
   511  * NB: pool must not be changing on-disk (eg, from zdb or sync context).
   425  */
   512  */
   432 	dsl_pool_t *dp = spa_get_dsl(spa);
   519 	dsl_pool_t *dp = spa_get_dsl(spa);
   433 	objset_t *mos = dp->dp_meta_objset;
   520 	objset_t *mos = dp->dp_meta_objset;
   434 	boolean_t hard = (flags & TRAVERSE_HARD);
   521 	boolean_t hard = (flags & TRAVERSE_HARD);
   435 
   522 
   436 	/* visit the MOS */
   523 	/* visit the MOS */
   437 	err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
   524 	err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
   438 	    txg_start, flags, func, arg);
   525 	    txg_start, NULL, flags, func, arg);
   439 	if (err)
   526 	if (err)
   440 		return (err);
   527 		return (err);
   441 
   528 
   442 	/* visit each dataset */
   529 	/* visit each dataset */
   443 	for (obj = 1; err == 0 || (err != ESRCH && hard);
   530 	for (obj = 1; err == 0 || (err != ESRCH && hard);