usr/src/uts/common/fs/zfs/metaslab.c
author Chris Kirby <Chris.Kirby@oracle.com>
Thu, 10 Jun 2010 15:46:47 -0600
changeset 12605 6790e683d5a5
parent 12047 7c1fcc8419ca
child 13379 4df42cc92254
permissions -rw-r--r--
6959846 DMU traverse prefetch size should be a global tunable
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     1
/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     2
 * CDDL HEADER START
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     3
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     4
 * The contents of this file are subject to the terms of the
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
     5
 * Common Development and Distribution License (the "License").
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
     6
 * You may not use this file except in compliance with the License.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     7
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     8
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
     9
 * or http://www.opensolaris.org/os/licensing.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    10
 * See the License for the specific language governing permissions
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    11
 * and limitations under the License.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    12
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    13
 * When distributing Covered Code, include this CDDL HEADER in each
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    14
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    15
 * If applicable, add the following below this CDDL HEADER, with the
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    16
 * fields enclosed by brackets "[]" replaced with your own identifying
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    17
 * information: Portions Copyright [yyyy] [name of copyright owner]
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    18
 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    19
 * CDDL HEADER END
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    20
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    21
/*
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
    22
 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    23
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    24
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    25
#include <sys/zfs_context.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    26
#include <sys/dmu.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    27
#include <sys/dmu_tx.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    28
#include <sys/space_map.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    29
#include <sys/metaslab_impl.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    30
#include <sys/vdev_impl.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    31
#include <sys/zio.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    32
2391
2fa3fd1db808 6447377 ZFS prefetch is inconsistant
maybee
parents: 1807
diff changeset
    33
uint64_t metaslab_aliquot = 512ULL << 10;
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
    34
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1;	/* force gang blocks */
2391
2fa3fd1db808 6447377 ZFS prefetch is inconsistant
maybee
parents: 1807
diff changeset
    35
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    36
/*
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
    37
 * Metaslab debugging: when set, keeps all space maps in core to verify frees.
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
    38
 */
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
    39
static int metaslab_debug = 0;
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
    40
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
    41
/*
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    42
 * Minimum size which forces the dynamic allocator to change
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    43
 * it's allocation strategy.  Once the space map cannot satisfy
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    44
 * an allocation of this size then it switches to using more
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    45
 * aggressive strategy (i.e search by size rather than offset).
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    46
 */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    47
uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    48
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    49
/*
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    50
 * The minimum free space, in percent, which must be available
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    51
 * in a space map to continue allocations in a first-fit fashion.
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    52
 * Once the space_map's free space drops below this level we dynamically
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    53
 * switch to using best-fit allocations.
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    54
 */
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    55
int metaslab_df_free_pct = 4;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    56
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    57
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    58
 * A metaslab is considered "free" if it contains a contiguous
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    59
 * segment which is greater than metaslab_min_alloc_size.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    60
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    61
uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    62
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    63
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    64
 * Max number of space_maps to prefetch.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    65
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    66
int metaslab_prefetch_limit = SPA_DVAS_PER_BP;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    67
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    68
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    69
 * Percentage bonus multiplier for metaslabs that are in the bonus area.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    70
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
    71
int metaslab_smo_bonus_pct = 150;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    72
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    73
/*
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    74
 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    75
 * Metaslab classes
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    76
 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    77
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    78
metaslab_class_t *
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
    79
metaslab_class_create(spa_t *spa, space_map_ops_t *ops)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    80
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    81
	metaslab_class_t *mc;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    82
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    83
	mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    84
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
    85
	mc->mc_spa = spa;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    86
	mc->mc_rotor = NULL;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
    87
	mc->mc_ops = ops;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    88
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    89
	return (mc);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    90
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    91
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    92
void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    93
metaslab_class_destroy(metaslab_class_t *mc)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
    94
{
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
    95
	ASSERT(mc->mc_rotor == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
    96
	ASSERT(mc->mc_alloc == 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
    97
	ASSERT(mc->mc_deferred == 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
    98
	ASSERT(mc->mc_space == 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
    99
	ASSERT(mc->mc_dspace == 0);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   100
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   101
	kmem_free(mc, sizeof (metaslab_class_t));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   102
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   103
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   104
int
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   105
metaslab_class_validate(metaslab_class_t *mc)
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   106
{
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   107
	metaslab_group_t *mg;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   108
	vdev_t *vd;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   109
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   110
	/*
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   111
	 * Must hold one of the spa_config locks.
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   112
	 */
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   113
	ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) ||
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   114
	    spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER));
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   115
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   116
	if ((mg = mc->mc_rotor) == NULL)
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   117
		return (0);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   118
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   119
	do {
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   120
		vd = mg->mg_vd;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   121
		ASSERT(vd->vdev_mg != NULL);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   122
		ASSERT3P(vd->vdev_top, ==, vd);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   123
		ASSERT3P(mg->mg_class, ==, mc);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   124
		ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   125
	} while ((mg = mg->mg_next) != mc->mc_rotor);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   126
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   127
	return (0);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   128
}
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   129
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   130
void
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   131
metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta,
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   132
    int64_t defer_delta, int64_t space_delta, int64_t dspace_delta)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   133
{
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   134
	atomic_add_64(&mc->mc_alloc, alloc_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   135
	atomic_add_64(&mc->mc_deferred, defer_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   136
	atomic_add_64(&mc->mc_space, space_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   137
	atomic_add_64(&mc->mc_dspace, dspace_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   138
}
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   139
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   140
uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   141
metaslab_class_get_alloc(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   142
{
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   143
	return (mc->mc_alloc);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   144
}
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   145
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   146
uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   147
metaslab_class_get_deferred(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   148
{
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   149
	return (mc->mc_deferred);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   150
}
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   151
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   152
uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   153
metaslab_class_get_space(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   154
{
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   155
	return (mc->mc_space);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   156
}
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   157
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   158
uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   159
metaslab_class_get_dspace(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   160
{
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   161
	return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   162
}
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   163
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   164
/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   165
 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   166
 * Metaslab groups
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   167
 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   168
 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   169
static int
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   170
metaslab_compare(const void *x1, const void *x2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   171
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   172
	const metaslab_t *m1 = x1;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   173
	const metaslab_t *m2 = x2;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   174
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   175
	if (m1->ms_weight < m2->ms_weight)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   176
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   177
	if (m1->ms_weight > m2->ms_weight)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   178
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   179
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   180
	/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   181
	 * If the weights are identical, use the offset to force uniqueness.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   182
	 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   183
	if (m1->ms_map.sm_start < m2->ms_map.sm_start)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   184
		return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   185
	if (m1->ms_map.sm_start > m2->ms_map.sm_start)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   186
		return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   187
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   188
	ASSERT3P(m1, ==, m2);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   189
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   190
	return (0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   191
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   192
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   193
metaslab_group_t *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   194
metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   195
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   196
	metaslab_group_t *mg;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   197
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   198
	mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   199
	mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   200
	avl_create(&mg->mg_metaslab_tree, metaslab_compare,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   201
	    sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   202
	mg->mg_vd = vd;
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   203
	mg->mg_class = mc;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   204
	mg->mg_activation_count = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   205
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   206
	return (mg);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   207
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   208
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   209
void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   210
metaslab_group_destroy(metaslab_group_t *mg)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   211
{
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   212
	ASSERT(mg->mg_prev == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   213
	ASSERT(mg->mg_next == NULL);
11026
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
   214
	/*
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
   215
	 * We may have gone below zero with the activation count
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
   216
	 * either because we never activated in the first place or
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
   217
	 * because we're done, and possibly removing the vdev.
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
   218
	 */
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
   219
	ASSERT(mg->mg_activation_count <= 0);
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   220
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   221
	avl_destroy(&mg->mg_metaslab_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   222
	mutex_destroy(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   223
	kmem_free(mg, sizeof (metaslab_group_t));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   224
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   225
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   226
void
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   227
metaslab_group_activate(metaslab_group_t *mg)
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   228
{
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   229
	metaslab_class_t *mc = mg->mg_class;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   230
	metaslab_group_t *mgprev, *mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   231
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   232
	ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER));
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   233
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   234
	ASSERT(mc->mc_rotor != mg);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   235
	ASSERT(mg->mg_prev == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   236
	ASSERT(mg->mg_next == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   237
	ASSERT(mg->mg_activation_count <= 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   238
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   239
	if (++mg->mg_activation_count <= 0)
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   240
		return;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   241
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   242
	mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   243
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   244
	if ((mgprev = mc->mc_rotor) == NULL) {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   245
		mg->mg_prev = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   246
		mg->mg_next = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   247
	} else {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   248
		mgnext = mgprev->mg_next;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   249
		mg->mg_prev = mgprev;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   250
		mg->mg_next = mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   251
		mgprev->mg_next = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   252
		mgnext->mg_prev = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   253
	}
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   254
	mc->mc_rotor = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   255
}
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   256
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   257
void
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   258
metaslab_group_passivate(metaslab_group_t *mg)
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   259
{
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   260
	metaslab_class_t *mc = mg->mg_class;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   261
	metaslab_group_t *mgprev, *mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   262
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   263
	ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER));
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   264
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   265
	if (--mg->mg_activation_count != 0) {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   266
		ASSERT(mc->mc_rotor != mg);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   267
		ASSERT(mg->mg_prev == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   268
		ASSERT(mg->mg_next == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   269
		ASSERT(mg->mg_activation_count < 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   270
		return;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   271
	}
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   272
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   273
	mgprev = mg->mg_prev;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   274
	mgnext = mg->mg_next;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   275
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   276
	if (mg == mgnext) {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   277
		mc->mc_rotor = NULL;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   278
	} else {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   279
		mc->mc_rotor = mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   280
		mgprev->mg_next = mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   281
		mgnext->mg_prev = mgprev;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   282
	}
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   283
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   284
	mg->mg_prev = NULL;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   285
	mg->mg_next = NULL;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   286
}
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
   287
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   288
static void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   289
metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   290
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   291
	mutex_enter(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   292
	ASSERT(msp->ms_group == NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   293
	msp->ms_group = mg;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   294
	msp->ms_weight = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   295
	avl_add(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   296
	mutex_exit(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   297
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   298
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   299
static void
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   300
metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   301
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   302
	mutex_enter(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   303
	ASSERT(msp->ms_group == mg);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   304
	avl_remove(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   305
	msp->ms_group = NULL;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   306
	mutex_exit(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   307
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   308
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   309
static void
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   310
metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   311
{
2459
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   312
	/*
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   313
	 * Although in principle the weight can be any value, in
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   314
	 * practice we do not use values in the range [1, 510].
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   315
	 */
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   316
	ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   317
	ASSERT(MUTEX_HELD(&msp->ms_lock));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   318
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   319
	mutex_enter(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   320
	ASSERT(msp->ms_group == mg);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   321
	avl_remove(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   322
	msp->ms_weight = weight;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   323
	avl_add(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   324
	mutex_exit(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   325
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   326
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   327
/*
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   328
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   329
 * Common allocator routines
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   330
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   331
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   332
static int
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   333
metaslab_segsize_compare(const void *x1, const void *x2)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   334
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   335
	const space_seg_t *s1 = x1;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   336
	const space_seg_t *s2 = x2;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   337
	uint64_t ss_size1 = s1->ss_end - s1->ss_start;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   338
	uint64_t ss_size2 = s2->ss_end - s2->ss_start;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   339
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   340
	if (ss_size1 < ss_size2)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   341
		return (-1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   342
	if (ss_size1 > ss_size2)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   343
		return (1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   344
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   345
	if (s1->ss_start < s2->ss_start)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   346
		return (-1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   347
	if (s1->ss_start > s2->ss_start)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   348
		return (1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   349
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   350
	return (0);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   351
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   352
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   353
/*
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   354
 * This is a helper function that can be used by the allocator to find
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   355
 * a suitable block to allocate. This will search the specified AVL
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   356
 * tree looking for a block that matches the specified criteria.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   357
 */
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   358
static uint64_t
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   359
metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   360
    uint64_t align)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   361
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   362
	space_seg_t *ss, ssearch;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   363
	avl_index_t where;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   364
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   365
	ssearch.ss_start = *cursor;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   366
	ssearch.ss_end = *cursor + size;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   367
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   368
	ss = avl_find(t, &ssearch, &where);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   369
	if (ss == NULL)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   370
		ss = avl_nearest(t, where, AVL_AFTER);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   371
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   372
	while (ss != NULL) {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   373
		uint64_t offset = P2ROUNDUP(ss->ss_start, align);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   374
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   375
		if (offset + size <= ss->ss_end) {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   376
			*cursor = offset + size;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   377
			return (offset);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   378
		}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   379
		ss = AVL_NEXT(t, ss);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   380
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   381
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   382
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   383
	 * If we know we've searched the whole map (*cursor == 0), give up.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   384
	 * Otherwise, reset the cursor to the beginning and try again.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   385
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   386
	if (*cursor == 0)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   387
		return (-1ULL);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   388
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   389
	*cursor = 0;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   390
	return (metaslab_block_picker(t, cursor, size, align));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   391
}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   392
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   393
static void
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   394
metaslab_pp_load(space_map_t *sm)
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   395
{
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   396
	space_seg_t *ss;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   397
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   398
	ASSERT(sm->sm_ppd == NULL);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   399
	sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   400
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   401
	sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   402
	avl_create(sm->sm_pp_root, metaslab_segsize_compare,
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   403
	    sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   404
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   405
	for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   406
		avl_add(sm->sm_pp_root, ss);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   407
}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   408
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   409
static void
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   410
metaslab_pp_unload(space_map_t *sm)
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   411
{
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   412
	void *cookie = NULL;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   413
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   414
	kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   415
	sm->sm_ppd = NULL;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   416
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   417
	while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   418
		/* tear down the tree */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   419
	}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   420
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   421
	avl_destroy(sm->sm_pp_root);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   422
	kmem_free(sm->sm_pp_root, sizeof (avl_tree_t));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   423
	sm->sm_pp_root = NULL;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   424
}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   425
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   426
/* ARGSUSED */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   427
static void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   428
metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   429
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   430
	/* No need to update cursor */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   431
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   432
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   433
/* ARGSUSED */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   434
static void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   435
metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   436
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   437
	/* No need to update cursor */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   438
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   439
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   440
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   441
 * Return the maximum contiguous segment within the metaslab.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   442
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   443
uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   444
metaslab_pp_maxsize(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   445
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   446
	avl_tree_t *t = sm->sm_pp_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   447
	space_seg_t *ss;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   448
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   449
	if (t == NULL || (ss = avl_last(t)) == NULL)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   450
		return (0ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   451
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   452
	return (ss->ss_end - ss->ss_start);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   453
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   454
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   455
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   456
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   457
 * The first-fit block allocator
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   458
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   459
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   460
static uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   461
metaslab_ff_alloc(space_map_t *sm, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   462
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   463
	avl_tree_t *t = &sm->sm_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   464
	uint64_t align = size & -size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   465
	uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   466
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   467
	return (metaslab_block_picker(t, cursor, size, align));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   468
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   469
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   470
/* ARGSUSED */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   471
boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   472
metaslab_ff_fragmented(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   473
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   474
	return (B_TRUE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   475
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   476
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   477
static space_map_ops_t metaslab_ff_ops = {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   478
	metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   479
	metaslab_pp_unload,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   480
	metaslab_ff_alloc,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   481
	metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   482
	metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   483
	metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   484
	metaslab_ff_fragmented
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   485
};
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   486
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   487
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   488
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   489
 * Dynamic block allocator -
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   490
 * Uses the first fit allocation scheme until space get low and then
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   491
 * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   492
 * and metaslab_df_free_pct to determine when to switch the allocation scheme.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   493
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   494
 */
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   495
static uint64_t
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   496
metaslab_df_alloc(space_map_t *sm, uint64_t size)
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   497
{
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   498
	avl_tree_t *t = &sm->sm_root;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   499
	uint64_t align = size & -size;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   500
	uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   501
	uint64_t max_size = metaslab_pp_maxsize(sm);
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   502
	int free_pct = sm->sm_space * 100 / sm->sm_size;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   503
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   504
	ASSERT(MUTEX_HELD(sm->sm_lock));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   505
	ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   506
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   507
	if (max_size < size)
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   508
		return (-1ULL);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   509
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   510
	/*
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   511
	 * If we're running low on space switch to using the size
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   512
	 * sorted AVL tree (best-fit).
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   513
	 */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   514
	if (max_size < metaslab_df_alloc_threshold ||
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   515
	    free_pct < metaslab_df_free_pct) {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   516
		t = sm->sm_pp_root;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   517
		*cursor = 0;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   518
	}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   519
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   520
	return (metaslab_block_picker(t, cursor, size, 1ULL));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   521
}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   522
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   523
static boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   524
metaslab_df_fragmented(space_map_t *sm)
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   525
{
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   526
	uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   527
	int free_pct = sm->sm_space * 100 / sm->sm_size;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   528
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   529
	if (max_size >= metaslab_df_alloc_threshold &&
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   530
	    free_pct >= metaslab_df_free_pct)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   531
		return (B_FALSE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   532
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   533
	return (B_TRUE);
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   534
}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   535
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   536
static space_map_ops_t metaslab_df_ops = {
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   537
	metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   538
	metaslab_pp_unload,
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   539
	metaslab_df_alloc,
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   540
	metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   541
	metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   542
	metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   543
	metaslab_df_fragmented
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   544
};
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   545
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   546
/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   547
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   548
 * Other experimental allocators
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   549
 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   550
 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   551
static uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   552
metaslab_cdf_alloc(space_map_t *sm, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   553
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   554
	avl_tree_t *t = &sm->sm_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   555
	uint64_t *cursor = (uint64_t *)sm->sm_ppd;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   556
	uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   557
	uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   558
	uint64_t rsize = size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   559
	uint64_t offset = 0;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   560
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   561
	ASSERT(MUTEX_HELD(sm->sm_lock));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   562
	ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   563
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   564
	if (max_size < size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   565
		return (-1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   566
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   567
	ASSERT3U(*extent_end, >=, *cursor);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   568
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   569
	/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   570
	 * If we're running low on space switch to using the size
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   571
	 * sorted AVL tree (best-fit).
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   572
	 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   573
	if ((*cursor + size) > *extent_end) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   574
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   575
		t = sm->sm_pp_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   576
		*cursor = *extent_end = 0;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   577
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   578
		if (max_size > 2 * SPA_MAXBLOCKSIZE)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   579
			rsize = MIN(metaslab_min_alloc_size, max_size);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   580
		offset = metaslab_block_picker(t, extent_end, rsize, 1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   581
		if (offset != -1)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   582
			*cursor = offset + size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   583
	} else {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   584
		offset = metaslab_block_picker(t, cursor, rsize, 1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   585
	}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   586
	ASSERT3U(*cursor, <=, *extent_end);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   587
	return (offset);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   588
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   589
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   590
static boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   591
metaslab_cdf_fragmented(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   592
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   593
	uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   594
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   595
	if (max_size > (metaslab_min_alloc_size * 10))
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   596
		return (B_FALSE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   597
	return (B_TRUE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   598
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   599
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   600
static space_map_ops_t metaslab_cdf_ops = {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   601
	metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   602
	metaslab_pp_unload,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   603
	metaslab_cdf_alloc,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   604
	metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   605
	metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   606
	metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   607
	metaslab_cdf_fragmented
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   608
};
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   609
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   610
uint64_t metaslab_ndf_clump_shift = 4;
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   611
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   612
static uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   613
metaslab_ndf_alloc(space_map_t *sm, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   614
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   615
	avl_tree_t *t = &sm->sm_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   616
	avl_index_t where;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   617
	space_seg_t *ss, ssearch;
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   618
	uint64_t hbit = highbit(size);
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   619
	uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1;
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   620
	uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   621
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   622
	ASSERT(MUTEX_HELD(sm->sm_lock));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   623
	ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   624
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   625
	if (max_size < size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   626
		return (-1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   627
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   628
	ssearch.ss_start = *cursor;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   629
	ssearch.ss_end = *cursor + size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   630
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   631
	ss = avl_find(t, &ssearch, &where);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   632
	if (ss == NULL || (ss->ss_start + size > ss->ss_end)) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   633
		t = sm->sm_pp_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   634
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   635
		ssearch.ss_start = 0;
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   636
		ssearch.ss_end = MIN(max_size,
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   637
		    1ULL << (hbit + metaslab_ndf_clump_shift));
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   638
		ss = avl_find(t, &ssearch, &where);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   639
		if (ss == NULL)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   640
			ss = avl_nearest(t, where, AVL_AFTER);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   641
		ASSERT(ss != NULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   642
	}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   643
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   644
	if (ss != NULL) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   645
		if (ss->ss_start + size <= ss->ss_end) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   646
			*cursor = ss->ss_start + size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   647
			return (ss->ss_start);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   648
		}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   649
	}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   650
	return (-1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   651
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   652
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   653
static boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   654
metaslab_ndf_fragmented(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   655
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   656
	uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   657
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   658
	if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift))
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   659
		return (B_FALSE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   660
	return (B_TRUE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   661
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   662
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   663
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   664
static space_map_ops_t metaslab_ndf_ops = {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   665
	metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   666
	metaslab_pp_unload,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   667
	metaslab_ndf_alloc,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   668
	metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   669
	metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   670
	metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   671
	metaslab_ndf_fragmented
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   672
};
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   673
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
   674
space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   675
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   676
/*
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   677
 * ==========================================================================
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   678
 * Metaslabs
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   679
 * ==========================================================================
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   680
 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   681
metaslab_t *
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   682
metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   683
	uint64_t start, uint64_t size, uint64_t txg)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   684
{
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   685
	vdev_t *vd = mg->mg_vd;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   686
	metaslab_t *msp;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   687
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   688
	msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP);
2856
6f4d5ee1906a 6463348 ZFS code could be more portable
nd150628
parents: 2459
diff changeset
   689
	mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   690
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   691
	msp->ms_smo_syncing = *smo;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   692
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   693
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   694
	 * We create the main space map here, but we don't create the
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   695
	 * allocmaps and freemaps until metaslab_sync_done().  This serves
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   696
	 * two purposes: it allows metaslab_sync_done() to detect the
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   697
	 * addition of new space; and for debugging, it ensures that we'd
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   698
	 * data fault on any attempt to use this metaslab before it's ready.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   699
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   700
	space_map_create(&msp->ms_map, start, size,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   701
	    vd->vdev_ashift, &msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   702
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   703
	metaslab_group_add(mg, msp);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   704
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   705
	if (metaslab_debug && smo->smo_object != 0) {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   706
		mutex_enter(&msp->ms_lock);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   707
		VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops,
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   708
		    SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   709
		mutex_exit(&msp->ms_lock);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   710
	}
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   711
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   712
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   713
	 * If we're opening an existing pool (txg == 0) or creating
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   714
	 * a new one (txg == TXG_INITIAL), all space is available now.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   715
	 * If we're adding space to an existing pool, the new space
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   716
	 * does not become available until after this txg has synced.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   717
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   718
	if (txg <= TXG_INITIAL)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   719
		metaslab_sync_done(msp, 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   720
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   721
	if (txg != 0) {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   722
		vdev_dirty(vd, 0, NULL, txg);
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   723
		vdev_dirty(vd, VDD_METASLAB, msp, txg);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   724
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   725
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   726
	return (msp);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   727
}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   728
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   729
void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   730
metaslab_fini(metaslab_t *msp)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   731
{
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   732
	metaslab_group_t *mg = msp->ms_group;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   733
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   734
	vdev_space_update(mg->mg_vd,
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   735
	    -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   736
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   737
	metaslab_group_remove(mg, msp);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   738
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   739
	mutex_enter(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   740
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   741
	space_map_unload(&msp->ms_map);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   742
	space_map_destroy(&msp->ms_map);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   743
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   744
	for (int t = 0; t < TXG_SIZE; t++) {
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   745
		space_map_destroy(&msp->ms_allocmap[t]);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   746
		space_map_destroy(&msp->ms_freemap[t]);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   747
	}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   748
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   749
	for (int t = 0; t < TXG_DEFER_SIZE; t++)
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   750
		space_map_destroy(&msp->ms_defermap[t]);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   751
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   752
	ASSERT3S(msp->ms_deferspace, ==, 0);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   753
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   754
	mutex_exit(&msp->ms_lock);
2856
6f4d5ee1906a 6463348 ZFS code could be more portable
nd150628
parents: 2459
diff changeset
   755
	mutex_destroy(&msp->ms_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   756
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   757
	kmem_free(msp, sizeof (metaslab_t));
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   758
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   759
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   760
#define	METASLAB_WEIGHT_PRIMARY		(1ULL << 63)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   761
#define	METASLAB_WEIGHT_SECONDARY	(1ULL << 62)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   762
#define	METASLAB_ACTIVE_MASK		\
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   763
	(METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   764
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   765
static uint64_t
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   766
metaslab_weight(metaslab_t *msp)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   767
{
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   768
	metaslab_group_t *mg = msp->ms_group;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   769
	space_map_t *sm = &msp->ms_map;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   770
	space_map_obj_t *smo = &msp->ms_smo;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   771
	vdev_t *vd = mg->mg_vd;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   772
	uint64_t weight, space;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   773
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   774
	ASSERT(MUTEX_HELD(&msp->ms_lock));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   775
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   776
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   777
	 * The baseline weight is the metaslab's free space.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   778
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   779
	space = sm->sm_size - smo->smo_alloc;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   780
	weight = space;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   781
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   782
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   783
	 * Modern disks have uniform bit density and constant angular velocity.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   784
	 * Therefore, the outer recording zones are faster (higher bandwidth)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   785
	 * than the inner zones by the ratio of outer to inner track diameter,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   786
	 * which is typically around 2:1.  We account for this by assigning
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   787
	 * higher weight to lower metaslabs (multiplier ranging from 2x to 1x).
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   788
	 * In effect, this means that we'll select the metaslab with the most
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   789
	 * free bandwidth rather than simply the one with the most free space.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   790
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   791
	weight = 2 * weight -
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   792
	    ((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   793
	ASSERT(weight >= space && weight <= 2 * space);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   794
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   795
	/*
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   796
	 * For locality, assign higher weight to metaslabs which have
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   797
	 * a lower offset than what we've already activated.
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   798
	 */
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   799
	if (sm->sm_start <= mg->mg_bonus_area)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   800
		weight *= (metaslab_smo_bonus_pct / 100);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   801
	ASSERT(weight >= space &&
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   802
	    weight <= 2 * (metaslab_smo_bonus_pct / 100) * space);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   803
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   804
	if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   805
		/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   806
		 * If this metaslab is one we're actively using, adjust its
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   807
		 * weight to make it preferable to any inactive metaslab so
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   808
		 * we'll polish it off.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   809
		 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   810
		weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   811
	}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   812
	return (weight);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   813
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   814
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   815
static void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   816
metaslab_prefetch(metaslab_group_t *mg)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   817
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   818
	spa_t *spa = mg->mg_vd->vdev_spa;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   819
	metaslab_t *msp;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   820
	avl_tree_t *t = &mg->mg_metaslab_tree;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   821
	int m;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   822
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   823
	mutex_enter(&mg->mg_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   824
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   825
	/*
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   826
	 * Prefetch the next potential metaslabs
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   827
	 */
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   828
	for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   829
		space_map_t *sm = &msp->ms_map;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   830
		space_map_obj_t *smo = &msp->ms_smo;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   831
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   832
		/* If we have reached our prefetch limit then we're done */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   833
		if (m >= metaslab_prefetch_limit)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   834
			break;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   835
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   836
		if (!sm->sm_loaded && smo->smo_object != 0) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   837
			mutex_exit(&mg->mg_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   838
			dmu_prefetch(spa_meta_objset(spa), smo->smo_object,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   839
			    0ULL, smo->smo_objsize);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   840
			mutex_enter(&mg->mg_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   841
		}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   842
	}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   843
	mutex_exit(&mg->mg_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   844
}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   845
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   846
static int
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   847
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   848
{
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   849
	metaslab_group_t *mg = msp->ms_group;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   850
	space_map_t *sm = &msp->ms_map;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   851
	space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   852
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   853
	ASSERT(MUTEX_HELD(&msp->ms_lock));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   854
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   855
	if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   856
		space_map_load_wait(sm);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   857
		if (!sm->sm_loaded) {
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   858
			int error = space_map_load(sm, sm_ops, SM_FREE,
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   859
			    &msp->ms_smo,
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   860
			    spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   861
			if (error)  {
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   862
				metaslab_group_sort(msp->ms_group, msp, 0);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   863
				return (error);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   864
			}
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   865
			for (int t = 0; t < TXG_DEFER_SIZE; t++)
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   866
				space_map_walk(&msp->ms_defermap[t],
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   867
				    space_map_claim, sm);
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   868
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   869
		}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   870
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   871
		/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   872
		 * Track the bonus area as we activate new metaslabs.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   873
		 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   874
		if (sm->sm_start > mg->mg_bonus_area) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   875
			mutex_enter(&mg->mg_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   876
			mg->mg_bonus_area = sm->sm_start;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
   877
			mutex_exit(&mg->mg_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   878
		}
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   879
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   880
		/*
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   881
		 * If we were able to load the map then make sure
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   882
		 * that this map is still able to satisfy our request.
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   883
		 */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   884
		if (msp->ms_weight < size)
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   885
			return (ENOSPC);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
   886
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   887
		metaslab_group_sort(msp->ms_group, msp,
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   888
		    msp->ms_weight | activation_weight);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   889
	}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   890
	ASSERT(sm->sm_loaded);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   891
	ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   892
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   893
	return (0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   894
}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   895
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   896
static void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   897
metaslab_passivate(metaslab_t *msp, uint64_t size)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   898
{
2459
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   899
	/*
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   900
	 * If size < SPA_MINBLOCKSIZE, then we will not allocate from
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   901
	 * this metaslab again.  In that case, it had better be empty,
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   902
	 * or we would be leaving space on the table.
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   903
	 */
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
   904
	ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   905
	metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
   906
	ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   907
}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   908
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   909
/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   910
 * Write a metaslab to disk in the context of the specified transaction group.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   911
 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   912
void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   913
metaslab_sync(metaslab_t *msp, uint64_t txg)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   914
{
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   915
	vdev_t *vd = msp->ms_group->mg_vd;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   916
	spa_t *spa = vd->vdev_spa;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
   917
	objset_t *mos = spa_meta_objset(spa);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   918
	space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   919
	space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   920
	space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   921
	space_map_t *sm = &msp->ms_map;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   922
	space_map_obj_t *smo = &msp->ms_smo_syncing;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   923
	dmu_buf_t *db;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   924
	dmu_tx_t *tx;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   925
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   926
	ASSERT(!vd->vdev_ishole);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
   927
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   928
	if (allocmap->sm_space == 0 && freemap->sm_space == 0)
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   929
		return;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   930
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   931
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   932
	 * The only state that can actually be changing concurrently with
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   933
	 * metaslab_sync() is the metaslab's ms_map.  No other thread can
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   934
	 * be modifying this txg's allocmap, freemap, freed_map, or smo.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   935
	 * Therefore, we only hold ms_lock to satify space_map ASSERTs.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   936
	 * We drop it whenever we call into the DMU, because the DMU
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   937
	 * can call down to us (e.g. via zio_free()) at any time.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   938
	 */
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   939
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   940
	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   941
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   942
	if (smo->smo_object == 0) {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   943
		ASSERT(smo->smo_objsize == 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   944
		ASSERT(smo->smo_alloc == 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   945
		smo->smo_object = dmu_object_alloc(mos,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   946
		    DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   947
		    DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   948
		ASSERT(smo->smo_object != 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   949
		dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   950
		    (sm->sm_start >> vd->vdev_ms_shift),
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   951
		    sizeof (uint64_t), &smo->smo_object, tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   952
	}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   953
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   954
	mutex_enter(&msp->ms_lock);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   955
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   956
	space_map_walk(freemap, space_map_add, freed_map);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   957
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   958
	if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >=
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   959
	    2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   960
		/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   961
		 * The in-core space map representation is twice as compact
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   962
		 * as the on-disk one, so it's time to condense the latter
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   963
		 * by generating a pure allocmap from first principles.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   964
		 *
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   965
		 * This metaslab is 100% allocated,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   966
		 * minus the content of the in-core map (sm),
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   967
		 * minus what's been freed this txg (freed_map),
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   968
		 * minus deferred frees (ms_defermap[]),
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   969
		 * minus allocations from txgs in the future
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   970
		 * (because they haven't been committed yet).
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   971
		 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   972
		space_map_vacate(allocmap, NULL, NULL);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   973
		space_map_vacate(freemap, NULL, NULL);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   974
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   975
		space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   976
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   977
		space_map_walk(sm, space_map_remove, allocmap);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   978
		space_map_walk(freed_map, space_map_remove, allocmap);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   979
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   980
		for (int t = 0; t < TXG_DEFER_SIZE; t++)
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   981
			space_map_walk(&msp->ms_defermap[t],
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   982
			    space_map_remove, allocmap);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   983
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
   984
		for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   985
			space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK],
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   986
			    space_map_remove, allocmap);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   987
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   988
		mutex_exit(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   989
		space_map_truncate(smo, mos, tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   990
		mutex_enter(&msp->ms_lock);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
   991
	}
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   992
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   993
	space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   994
	space_map_sync(freemap, SM_FREE, smo, mos, tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   995
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   996
	mutex_exit(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   997
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   998
	VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
   999
	dmu_buf_will_dirty(db, tx);
4944
96d96f8de974 6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents: 4527
diff changeset
  1000
	ASSERT3U(db->db_size, >=, sizeof (*smo));
96d96f8de974 6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents: 4527
diff changeset
  1001
	bcopy(smo, db->db_data, sizeof (*smo));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1002
	dmu_buf_rele(db, FTAG);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1003
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1004
	dmu_tx_commit(tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1005
}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1006
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1007
/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1008
 * Called after a transaction group has completely synced to mark
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1009
 * all of the metaslab's free space as usable.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1010
 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1011
void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1012
metaslab_sync_done(metaslab_t *msp, uint64_t txg)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1013
{
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1014
	space_map_obj_t *smo = &msp->ms_smo;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1015
	space_map_obj_t *smosync = &msp->ms_smo_syncing;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1016
	space_map_t *sm = &msp->ms_map;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1017
	space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1018
	space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1019
	metaslab_group_t *mg = msp->ms_group;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1020
	vdev_t *vd = mg->mg_vd;
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1021
	int64_t alloc_delta, defer_delta;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1022
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1023
	ASSERT(!vd->vdev_ishole);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1024
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1025
	mutex_enter(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1026
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1027
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1028
	 * If this metaslab is just becoming available, initialize its
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1029
	 * allocmaps and freemaps and add its capacity to the vdev.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1030
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1031
	if (freed_map->sm_size == 0) {
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1032
		for (int t = 0; t < TXG_SIZE; t++) {
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1033
			space_map_create(&msp->ms_allocmap[t], sm->sm_start,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1034
			    sm->sm_size, sm->sm_shift, sm->sm_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1035
			space_map_create(&msp->ms_freemap[t], sm->sm_start,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1036
			    sm->sm_size, sm->sm_shift, sm->sm_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1037
		}
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1038
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1039
		for (int t = 0; t < TXG_DEFER_SIZE; t++)
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1040
			space_map_create(&msp->ms_defermap[t], sm->sm_start,
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1041
			    sm->sm_size, sm->sm_shift, sm->sm_lock);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1042
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1043
		vdev_space_update(vd, 0, 0, sm->sm_size);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1044
	}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1045
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1046
	alloc_delta = smosync->smo_alloc - smo->smo_alloc;
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1047
	defer_delta = freed_map->sm_space - defer_map->sm_space;
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1048
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1049
	vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1050
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1051
	ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1052
	ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1053
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1054
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1055
	 * If there's a space_map_load() in progress, wait for it to complete
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1056
	 * so that we have a consistent view of the in-core space map.
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1057
	 * Then, add defer_map (oldest deferred frees) to this map and
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1058
	 * transfer freed_map (this txg's frees) to defer_map.
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1059
	 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1060
	space_map_load_wait(sm);
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1061
	space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1062
	space_map_vacate(freed_map, space_map_add, defer_map);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1063
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1064
	*smo = *smosync;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1065
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1066
	msp->ms_deferspace += defer_delta;
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1067
	ASSERT3S(msp->ms_deferspace, >=, 0);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1068
	ASSERT3S(msp->ms_deferspace, <=, sm->sm_size);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1069
	if (msp->ms_deferspace != 0) {
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1070
		/*
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1071
		 * Keep syncing this metaslab until all deferred frees
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1072
		 * are back in circulation.
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1073
		 */
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1074
		vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1075
	}
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1076
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1077
	/*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1078
	 * If the map is loaded but no longer active, evict it as soon as all
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1079
	 * future allocations have synced.  (If we unloaded it now and then
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1080
	 * loaded a moment later, the map wouldn't reflect those allocations.)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1081
	 */
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1082
	if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1083
		int evictable = 1;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1084
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
  1085
		for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1086
			if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1087
				evictable = 0;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1088
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1089
		if (evictable && !metaslab_debug)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1090
			space_map_unload(sm);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1091
	}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1092
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1093
	metaslab_group_sort(mg, msp, metaslab_weight(msp));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1094
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1095
	mutex_exit(&msp->ms_lock);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1096
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1097
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1098
void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1099
metaslab_sync_reassess(metaslab_group_t *mg)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1100
{
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1101
	vdev_t *vd = mg->mg_vd;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1102
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1103
	/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1104
	 * Re-evaluate all metaslabs which have lower offsets than the
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1105
	 * bonus area.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1106
	 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1107
	for (int m = 0; m < vd->vdev_ms_count; m++) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1108
		metaslab_t *msp = vd->vdev_ms[m];
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1109
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1110
		if (msp->ms_map.sm_start > mg->mg_bonus_area)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1111
			break;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1112
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1113
		mutex_enter(&msp->ms_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1114
		metaslab_group_sort(mg, msp, metaslab_weight(msp));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1115
		mutex_exit(&msp->ms_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1116
	}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1117
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1118
	/*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1119
	 * Prefetch the next potential metaslabs
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1120
	 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1121
	metaslab_prefetch(mg);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1122
}
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1123
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1124
static uint64_t
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1125
metaslab_distance(metaslab_t *msp, dva_t *dva)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1126
{
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1127
	uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1128
	uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1129
	uint64_t start = msp->ms_map.sm_start >> ms_shift;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1130
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1131
	if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1132
		return (1ULL << 63);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1133
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1134
	if (offset < start)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1135
		return ((start - offset) << ms_shift);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1136
	if (offset > start)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1137
		return ((offset - start) << ms_shift);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1138
	return (0);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1139
}
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1140
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1141
static uint64_t
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1142
metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1143
    uint64_t min_distance, dva_t *dva, int d)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1144
{
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1145
	metaslab_t *msp = NULL;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1146
	uint64_t offset = -1ULL;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1147
	avl_tree_t *t = &mg->mg_metaslab_tree;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1148
	uint64_t activation_weight;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1149
	uint64_t target_distance;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1150
	int i;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1151
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1152
	activation_weight = METASLAB_WEIGHT_PRIMARY;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1153
	for (i = 0; i < d; i++) {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1154
		if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1155
			activation_weight = METASLAB_WEIGHT_SECONDARY;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1156
			break;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1157
		}
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1158
	}
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1159
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1160
	for (;;) {
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1161
		boolean_t was_active;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1162
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1163
		mutex_enter(&mg->mg_lock);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1164
		for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1165
			if (msp->ms_weight < size) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1166
				mutex_exit(&mg->mg_lock);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1167
				return (-1ULL);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1168
			}
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1169
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1170
			was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1171
			if (activation_weight == METASLAB_WEIGHT_PRIMARY)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1172
				break;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1173
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1174
			target_distance = min_distance +
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1175
			    (msp->ms_smo.smo_alloc ? 0 : min_distance >> 1);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1176
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1177
			for (i = 0; i < d; i++)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1178
				if (metaslab_distance(msp, &dva[i]) <
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1179
				    target_distance)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1180
					break;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1181
			if (i == d)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1182
				break;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1183
		}
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1184
		mutex_exit(&mg->mg_lock);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1185
		if (msp == NULL)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1186
			return (-1ULL);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1187
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1188
		mutex_enter(&msp->ms_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1189
3848
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1190
		/*
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1191
		 * Ensure that the metaslab we have selected is still
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1192
		 * capable of handling our request. It's possible that
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1193
		 * another thread may have changed the weight while we
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1194
		 * were blocked on the metaslab lock.
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1195
		 */
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1196
		if (msp->ms_weight < size || (was_active &&
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1197
		    !(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1198
		    activation_weight == METASLAB_WEIGHT_PRIMARY)) {
3848
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1199
			mutex_exit(&msp->ms_lock);
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1200
			continue;
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1201
		}
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
  1202
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1203
		if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) &&
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1204
		    activation_weight == METASLAB_WEIGHT_PRIMARY) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1205
			metaslab_passivate(msp,
2459
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
  1206
			    msp->ms_weight & ~METASLAB_ACTIVE_MASK);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1207
			mutex_exit(&msp->ms_lock);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1208
			continue;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1209
		}
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1210
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1211
		if (metaslab_activate(msp, activation_weight, size) != 0) {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1212
			mutex_exit(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1213
			continue;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1214
		}
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1215
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1216
		if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1217
			break;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1218
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
  1219
		metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1220
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1221
		mutex_exit(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1222
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1223
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1224
	if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1225
		vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1226
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1227
	space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1228
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1229
	mutex_exit(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1230
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1231
	return (offset);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1232
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1233
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1234
/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1235
 * Allocate a block for the specified i/o.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1236
 */
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1237
static int
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1238
metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1239
    dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1240
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1241
	metaslab_group_t *mg, *rotor;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1242
	vdev_t *vd;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1243
	int dshift = 3;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1244
	int all_zero;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1245
	int zio_lock = B_FALSE;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1246
	boolean_t allocatable;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1247
	uint64_t offset = -1ULL;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1248
	uint64_t asize;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1249
	uint64_t distance;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1250
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1251
	ASSERT(!DVA_IS_VALID(&dva[d]));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1252
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1253
	/*
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
  1254
	 * For testing, make some blocks above a certain size be gang blocks.
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
  1255
	 */
11066
cebb50cbe4f9 PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents: 11026
diff changeset
  1256
	if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0)
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
  1257
		return (ENOSPC);
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
  1258
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
  1259
	/*
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1260
	 * Start at the rotor and loop through all mgs until we find something.
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1261
	 * Note that there's no locking on mc_rotor or mc_aliquot because
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1262
	 * nothing actually breaks if we miss a few updates -- we just won't
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1263
	 * allocate quite as evenly.  It all balances out over time.
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1264
	 *
3063
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
  1265
	 * If we are doing ditto or log blocks, try to spread them across
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
  1266
	 * consecutive vdevs.  If we're forced to reuse a vdev before we've
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
  1267
	 * allocated all of our ditto blocks, then try and spread them out on
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
  1268
	 * that vdev as much as possible.  If it turns out to not be possible,
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1269
	 * gradually lower our standards until anything becomes acceptable.
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1270
	 * Also, allocating on consecutive vdevs (as opposed to random vdevs)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1271
	 * gives us hope of containing our fault domains to something we're
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1272
	 * able to reason about.  Otherwise, any two top-level vdev failures
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1273
	 * will guarantee the loss of data.  With consecutive allocation,
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1274
	 * only two adjacent top-level vdev failures will result in data loss.
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1275
	 *
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1276
	 * If we are doing gang blocks (hintdva is non-NULL), try to keep
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1277
	 * ourselves on the same vdev as our gang block header.  That
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1278
	 * way, we can hope for locality in vdev_cache, plus it makes our
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1279
	 * fault domains something tractable.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1280
	 */
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1281
	if (hintdva) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1282
		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d]));
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1283
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1284
		/*
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1285
		 * It's possible the vdev we're using as the hint no
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1286
		 * longer exists (i.e. removed). Consult the rotor when
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1287
		 * all else fails.
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1288
		 */
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
  1289
		if (vd != NULL) {
3063
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
  1290
			mg = vd->vdev_mg;
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1291
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1292
			if (flags & METASLAB_HINTBP_AVOID &&
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1293
			    mg->mg_next != NULL)
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1294
				mg = mg->mg_next;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1295
		} else {
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1296
			mg = mc->mc_rotor;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
  1297
		}
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1298
	} else if (d != 0) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1299
		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1300
		mg = vd->vdev_mg->mg_next;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1301
	} else {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1302
		mg = mc->mc_rotor;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1303
	}
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1304
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1305
	/*
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
  1306
	 * If the hint put us into the wrong metaslab class, or into a
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
  1307
	 * metaslab group that has been passivated, just follow the rotor.
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1308
	 */
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
  1309
	if (mg->mg_class != mc || mg->mg_activation_count <= 0)
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1310
		mg = mc->mc_rotor;
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1311
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1312
	rotor = mg;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1313
top:
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1314
	all_zero = B_TRUE;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1315
	do {
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
  1316
		ASSERT(mg->mg_activation_count == 1);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
  1317
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1318
		vd = mg->mg_vd;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1319
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1320
		/*
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1321
		 * Don't allocate from faulted devices.
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1322
		 */
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1323
		if (zio_lock) {
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1324
			spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1325
			allocatable = vdev_allocatable(vd);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1326
			spa_config_exit(spa, SCL_ZIO, FTAG);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1327
		} else {
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1328
			allocatable = vdev_allocatable(vd);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1329
		}
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1330
		if (!allocatable)
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1331
			goto next;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1332
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1333
		/*
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1334
		 * Avoid writing single-copy data to a failing vdev
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1335
		 */
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1336
		if ((vd->vdev_stat.vs_write_errors > 0 ||
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1337
		    vd->vdev_state < VDEV_STATE_HEALTHY) &&
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1338
		    d == 0 && dshift == 3) {
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1339
			all_zero = B_FALSE;
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1340
			goto next;
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1341
		}
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1342
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1343
		ASSERT(mg->mg_class == mc);
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1344
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1345
		distance = vd->vdev_asize >> dshift;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1346
		if (distance <= (1ULL << vd->vdev_ms_shift))
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1347
			distance = 0;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1348
		else
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1349
			all_zero = B_FALSE;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1350
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1351
		asize = vdev_psize_to_asize(vd, psize);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1352
		ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1353
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1354
		offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1355
		if (offset != -1ULL) {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1356
			/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1357
			 * If we've just selected this metaslab group,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1358
			 * figure out whether the corresponding vdev is
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1359
			 * over- or under-used relative to the pool,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1360
			 * and set an allocation bias to even it out.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1361
			 */
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1362
			if (mc->mc_aliquot == 0) {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1363
				vdev_stat_t *vs = &vd->vdev_stat;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1364
				int64_t vu, cu;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1365
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1366
				/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1367
				 * Determine percent used in units of 0..1024.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1368
				 * (This is just to avoid floating point.)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1369
				 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1370
				vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1371
				cu = (mc->mc_alloc << 10) / (mc->mc_space + 1);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1372
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1373
				/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1374
				 * Bias by at most +/- 25% of the aliquot.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1375
				 */
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1376
				mg->mg_bias = ((cu - vu) *
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1377
				    (int64_t)mg->mg_aliquot) / (1024 * 4);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1378
			}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1379
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1380
			if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1381
			    mg->mg_aliquot + mg->mg_bias) {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1382
				mc->mc_rotor = mg->mg_next;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1383
				mc->mc_aliquot = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1384
			}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1385
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1386
			DVA_SET_VDEV(&dva[d], vd->vdev_id);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1387
			DVA_SET_OFFSET(&dva[d], offset);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1388
			DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER));
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1389
			DVA_SET_ASIZE(&dva[d], asize);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1390
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1391
			return (0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1392
		}
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
  1393
next:
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1394
		mc->mc_rotor = mg->mg_next;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1395
		mc->mc_aliquot = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1396
	} while ((mg = mg->mg_next) != rotor);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1397
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1398
	if (!all_zero) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1399
		dshift++;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1400
		ASSERT(dshift < 64);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1401
		goto top;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1402
	}
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1403
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
  1404
	if (!allocatable && !zio_lock) {
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1405
		dshift = 3;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1406
		zio_lock = B_TRUE;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1407
		goto top;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1408
	}
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1409
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
  1410
	bzero(&dva[d], sizeof (dva_t));
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1411
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1412
	return (ENOSPC);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1413
}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1414
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1415
/*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1416
 * Free the block represented by DVA in the context of the specified
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1417
 * transaction group.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1418
 */
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1419
static void
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1420
metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1421
{
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1422
	uint64_t vdev = DVA_GET_VDEV(dva);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1423
	uint64_t offset = DVA_GET_OFFSET(dva);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1424
	uint64_t size = DVA_GET_ASIZE(dva);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1425
	vdev_t *vd;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1426
	metaslab_t *msp;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1427
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1428
	ASSERT(DVA_IS_VALID(dva));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1429
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1430
	if (txg > spa_freeze_txg(spa))
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1431
		return;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1432
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1433
	if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1434
	    (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1435
		cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu",
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1436
		    (u_longlong_t)vdev, (u_longlong_t)offset);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1437
		ASSERT(0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1438
		return;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1439
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1440
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1441
	msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1442
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1443
	if (DVA_GET_GANG(dva))
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1444
		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1445
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1446
	mutex_enter(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1447
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1448
	if (now) {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1449
		space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1450
		    offset, size);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1451
		space_map_free(&msp->ms_map, offset, size);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1452
	} else {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1453
		if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1454
			vdev_dirty(vd, VDD_METASLAB, msp, txg);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
  1455
		space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1456
	}
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1457
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1458
	mutex_exit(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
  1459
}
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1460
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1461
/*
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1462
 * Intent log support: upon opening the pool after a crash, notify the SPA
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1463
 * of blocks that the intent log has allocated for immediate write, but
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1464
 * which are still considered free by the SPA because the last transaction
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1465
 * group didn't commit yet.
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1466
 */
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1467
static int
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1468
metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1469
{
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1470
	uint64_t vdev = DVA_GET_VDEV(dva);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1471
	uint64_t offset = DVA_GET_OFFSET(dva);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1472
	uint64_t size = DVA_GET_ASIZE(dva);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1473
	vdev_t *vd;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1474
	metaslab_t *msp;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1475
	int error = 0;
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1476
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1477
	ASSERT(DVA_IS_VALID(dva));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1478
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1479
	if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1480
	    (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1481
		return (ENXIO);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1482
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1483
	msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1484
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1485
	if (DVA_GET_GANG(dva))
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1486
		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1487
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1488
	mutex_enter(&msp->ms_lock);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1489
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1490
	if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1491
		error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1492
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1493
	if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1494
		error = ENOENT;
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1495
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1496
	if (error || txg == 0) {	/* txg == 0 indicates dry run */
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1497
		mutex_exit(&msp->ms_lock);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1498
		return (error);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1499
	}
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1500
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1501
	space_map_claim(&msp->ms_map, offset, size);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1502
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
  1503
	if (spa_writeable(spa)) {	/* don't dirty if we're zdb(1M) */
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1504
		if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1505
			vdev_dirty(vd, VDD_METASLAB, msp, txg);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1506
		space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1507
	}
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1508
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1509
	mutex_exit(&msp->ms_lock);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1510
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1511
	return (0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1512
}
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1513
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1514
int
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1515
metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1516
    int ndvas, uint64_t txg, blkptr_t *hintbp, int flags)
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1517
{
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1518
	dva_t *dva = bp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1519
	dva_t *hintdva = hintbp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1520
	int error = 0;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1521
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1522
	ASSERT(bp->blk_birth == 0);
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1523
	ASSERT(BP_PHYSICAL_BIRTH(bp) == 0);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1524
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1525
	spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1526
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1527
	if (mc->mc_rotor == NULL) {	/* no vdevs in this class */
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1528
		spa_config_exit(spa, SCL_ALLOC, FTAG);
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1529
		return (ENOSPC);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1530
	}
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1531
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1532
	ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1533
	ASSERT(BP_GET_NDVAS(bp) == 0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1534
	ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1535
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1536
	for (int d = 0; d < ndvas; d++) {
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
  1537
		error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva,
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1538
		    txg, flags);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1539
		if (error) {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1540
			for (d--; d >= 0; d--) {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1541
				metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1542
				bzero(&dva[d], sizeof (dva_t));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1543
			}
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1544
			spa_config_exit(spa, SCL_ALLOC, FTAG);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1545
			return (error);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1546
		}
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1547
	}
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1548
	ASSERT(error == 0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1549
	ASSERT(BP_GET_NDVAS(bp) == ndvas);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1550
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1551
	spa_config_exit(spa, SCL_ALLOC, FTAG);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1552
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1553
	BP_SET_BIRTH(bp, txg, txg);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1554
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1555
	return (0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1556
}
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1557
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1558
void
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1559
metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1560
{
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1561
	const dva_t *dva = bp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1562
	int ndvas = BP_GET_NDVAS(bp);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1563
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1564
	ASSERT(!BP_IS_HOLE(bp));
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
  1565
	ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa));
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1566
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1567
	spa_config_enter(spa, SCL_FREE, FTAG, RW_READER);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1568
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1569
	for (int d = 0; d < ndvas; d++)
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1570
		metaslab_free_dva(spa, &dva[d], txg, now);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1571
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1572
	spa_config_exit(spa, SCL_FREE, FTAG);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1573
}
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1574
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1575
int
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1576
metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1577
{
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1578
	const dva_t *dva = bp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1579
	int ndvas = BP_GET_NDVAS(bp);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1580
	int error = 0;
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1581
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1582
	ASSERT(!BP_IS_HOLE(bp));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1583
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1584
	if (txg != 0) {
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1585
		/*
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1586
		 * First do a dry run to make sure all DVAs are claimable,
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1587
		 * so we don't have to unwind from partial failures below.
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1588
		 */
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1589
		if ((error = metaslab_claim(spa, bp, 0)) != 0)
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1590
			return (error);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1591
	}
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1592
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1593
	spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1594
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1595
	for (int d = 0; d < ndvas; d++)
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1596
		if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0)
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1597
			break;
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1598
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1599
	spa_config_exit(spa, SCL_ALLOC, FTAG);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1600
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1601
	ASSERT(error == 0 || txg == 0);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1602
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
  1603
	return (error);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
  1604
}