usr/src/uts/common/fs/zfs/spa.c
changeset 11727 497d03ab2824
parent 11619 d3fb7ae6fe56
child 11810 0a50367f8035
--- a/usr/src/uts/common/fs/zfs/spa.c	Sat Feb 20 06:04:42 2010 +0300
+++ b/usr/src/uts/common/fs/zfs/spa.c	Sat Feb 20 06:04:43 2010 +0300
@@ -1405,7 +1405,7 @@
 }
 
 typedef struct spa_load_error {
-	uint64_t	sle_metadata_count;
+	uint64_t	sle_meta_count;
 	uint64_t	sle_data_count;
 } spa_load_error_t;
 
@@ -1420,7 +1420,7 @@
 	if (error) {
 		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
 		    type != DMU_OT_INTENT_LOG)
-			atomic_add_64(&sle->sle_metadata_count, 1);
+			atomic_add_64(&sle->sle_meta_count, 1);
 		else
 			atomic_add_64(&sle->sle_data_count, 1);
 	}
@@ -1454,6 +1454,11 @@
 	boolean_t verify_ok = B_FALSE;
 	int error;
 
+	zpool_get_rewind_policy(spa->spa_config, &policy);
+
+	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
+		return (0);
+
 	rio = zio_root(spa, NULL, &sle,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
 
@@ -1462,12 +1467,10 @@
 
 	(void) zio_wait(rio);
 
-	zpool_get_rewind_policy(spa->spa_config, &policy);
-
-	spa->spa_load_meta_errors = sle.sle_metadata_count;
+	spa->spa_load_meta_errors = sle.sle_meta_count;
 	spa->spa_load_data_errors = sle.sle_data_count;
 
-	if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta &&
+	if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
 	    sle.sle_data_count <= policy.zrp_maxdata) {
 		verify_ok = B_TRUE;
 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
@@ -1774,7 +1777,7 @@
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_ubsync = spa->spa_uberblock;
 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
-	    TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE;
+	    TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
 	spa->spa_claim_max_txg = spa->spa_first_txg;
@@ -1792,6 +1795,7 @@
 
 	if (!mosconfig) {
 		uint64_t hostid;
+		nvlist_t *policy = NULL;
 
 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
@@ -1821,6 +1825,10 @@
 				return (EBADF);
 			}
 		}
+		if (nvlist_lookup_nvlist(spa->spa_config,
+		    ZPOOL_REWIND_POLICY, &policy) == 0)
+			VERIFY(nvlist_add_nvlist(nvconfig,
+			    ZPOOL_REWIND_POLICY, policy) == 0);
 
 		spa_config_set(spa, nvconfig);
 		spa_unload(spa);
@@ -2097,11 +2105,11 @@
 
 static int
 spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
-    uint64_t max_request, boolean_t extreme)
+    uint64_t max_request, int rewind_flags)
 {
 	nvlist_t *config = NULL;
 	int load_error, rewind_error;
-	uint64_t safe_rollback_txg;
+	uint64_t safe_rewind_txg;
 	uint64_t min_txg;
 
 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
@@ -2122,8 +2130,7 @@
 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
 
-	/* specific txg requested */
-	if (spa->spa_load_max_txg != UINT64_MAX && !extreme) {
+	if (rewind_flags & ZPOOL_NEVER_REWIND) {
 		nvlist_free(config);
 		return (load_error);
 	}
@@ -2132,12 +2139,18 @@
 	if (state == SPA_LOAD_RECOVER)
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 
-	spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
-	safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE;
-
-	min_txg = extreme ? TXG_INITIAL : safe_rollback_txg;
-	while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) {
-		if (spa->spa_load_max_txg < safe_rollback_txg)
+	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
+	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
+	min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
+	    TXG_INITIAL : safe_rewind_txg;
+
+	/*
+	 * Continue as long as we're finding errors, we're still within
+	 * the acceptable rewind range, and we're still finding uberblocks
+	 */
+	while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
+	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
+		if (spa->spa_load_max_txg < safe_rewind_txg)
 			spa->spa_extreme_rewind = B_TRUE;
 		rewind_error = spa_load_retry(spa, state, mosconfig);
 	}
@@ -2171,8 +2184,6 @@
     nvlist_t **config)
 {
 	spa_t *spa;
-	boolean_t norewind;
-	boolean_t extreme;
 	zpool_rewind_policy_t policy;
 	spa_load_state_t state = SPA_LOAD_OPEN;
 	int error;
@@ -2180,12 +2191,6 @@
 
 	*spapp = NULL;
 
-	zpool_get_rewind_policy(nvpolicy, &policy);
-	if (policy.zrp_request & ZPOOL_DO_REWIND)
-		state = SPA_LOAD_RECOVER;
-	norewind = (policy.zrp_request == ZPOOL_NO_REWIND);
-	extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0);
-
 	/*
 	 * As disgusting as this is, we need to support recursive calls to this
 	 * function because dsl_dir_open() is called during spa_load(), and ends
@@ -2203,11 +2208,16 @@
 		return (ENOENT);
 	}
 
+	zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config, &policy);
+	if (policy.zrp_request & ZPOOL_DO_REWIND)
+		state = SPA_LOAD_RECOVER;
+
 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
 
 		spa_activate(spa, spa_mode_global);
 
-		if (spa->spa_last_open_failed && norewind) {
+		if (spa->spa_last_open_failed && (policy.zrp_request &
+		    (ZPOOL_NO_REWIND | ZPOOL_NEVER_REWIND))) {
 			if (config != NULL && spa->spa_config)
 				VERIFY(nvlist_dup(spa->spa_config,
 				    config, KM_SLEEP) == 0);
@@ -2221,7 +2231,7 @@
 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
 
 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
-		    extreme);
+		    policy.zrp_request);
 
 		if (error == EBADF) {
 			/*
@@ -3083,7 +3093,6 @@
 spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
 {
 	spa_t *spa;
-	zpool_rewind_policy_t policy;
 	char *altroot = NULL;
 
 	mutex_enter(&spa_namespace_lock);
@@ -3096,9 +3105,6 @@
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 	spa = spa_add(pool, config, altroot);
 
-	zpool_get_rewind_policy(config, &policy);
-	spa->spa_load_max_txg = policy.zrp_txg;
-
 	spa->spa_load_verbatim = B_TRUE;
 
 	if (props != NULL)
@@ -3161,7 +3167,7 @@
 	if (state != SPA_LOAD_RECOVER)
 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
-	    ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0));
+	    policy.zrp_request);
 
 	/*
 	 * Propagate anything learned about failing or best txgs