author | Chris Kirby <Chris.Kirby@oracle.com> |
Thu, 10 Jun 2010 15:46:47 -0600 | |
changeset 12605 | 6790e683d5a5 |
parent 12047 | 7c1fcc8419ca |
child 13379 | 4df42cc92254 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
22 |
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
789 | 23 |
*/ |
24 |
||
25 |
#include <sys/zfs_context.h> |
|
26 |
#include <sys/dmu.h> |
|
27 |
#include <sys/dmu_tx.h> |
|
28 |
#include <sys/space_map.h> |
|
29 |
#include <sys/metaslab_impl.h> |
|
30 |
#include <sys/vdev_impl.h> |
|
31 |
#include <sys/zio.h> |
|
32 |
||
2391 | 33 |
uint64_t metaslab_aliquot = 512ULL << 10; |
5530 | 34 |
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ |
2391 | 35 |
|
789 | 36 |
/* |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
37 |
* Metaslab debugging: when set, keeps all space maps in core to verify frees. |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
38 |
*/ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
39 |
static int metaslab_debug = 0; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
40 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
41 |
/* |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
42 |
* Minimum size which forces the dynamic allocator to change |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
43 |
* it's allocation strategy. Once the space map cannot satisfy |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
44 |
* an allocation of this size then it switches to using more |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
45 |
* aggressive strategy (i.e search by size rather than offset). |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
46 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
47 |
uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
48 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
49 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
50 |
* The minimum free space, in percent, which must be available |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
51 |
* in a space map to continue allocations in a first-fit fashion. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
52 |
* Once the space_map's free space drops below this level we dynamically |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
53 |
* switch to using best-fit allocations. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
54 |
*/ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
55 |
int metaslab_df_free_pct = 4; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
56 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
57 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
58 |
* A metaslab is considered "free" if it contains a contiguous |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
59 |
* segment which is greater than metaslab_min_alloc_size. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
60 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
61 |
uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
62 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
63 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
64 |
* Max number of space_maps to prefetch. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
65 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
66 |
int metaslab_prefetch_limit = SPA_DVAS_PER_BP; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
67 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
68 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
69 |
* Percentage bonus multiplier for metaslabs that are in the bonus area. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
70 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
71 |
int metaslab_smo_bonus_pct = 150; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
72 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
73 |
/* |
789 | 74 |
* ========================================================================== |
75 |
* Metaslab classes |
|
76 |
* ========================================================================== |
|
77 |
*/ |
|
78 |
metaslab_class_t * |
|
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
79 |
metaslab_class_create(spa_t *spa, space_map_ops_t *ops) |
789 | 80 |
{ |
81 |
metaslab_class_t *mc; |
|
82 |
||
83 |
mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); |
|
84 |
||
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
85 |
mc->mc_spa = spa; |
789 | 86 |
mc->mc_rotor = NULL; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
87 |
mc->mc_ops = ops; |
789 | 88 |
|
89 |
return (mc); |
|
90 |
} |
|
91 |
||
92 |
void |
|
93 |
metaslab_class_destroy(metaslab_class_t *mc) |
|
94 |
{ |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
95 |
ASSERT(mc->mc_rotor == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
96 |
ASSERT(mc->mc_alloc == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
97 |
ASSERT(mc->mc_deferred == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
98 |
ASSERT(mc->mc_space == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
99 |
ASSERT(mc->mc_dspace == 0); |
789 | 100 |
|
101 |
kmem_free(mc, sizeof (metaslab_class_t)); |
|
102 |
} |
|
103 |
||
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
104 |
int |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
105 |
metaslab_class_validate(metaslab_class_t *mc) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
106 |
{ |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
107 |
metaslab_group_t *mg; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
108 |
vdev_t *vd; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
109 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
110 |
/* |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
111 |
* Must hold one of the spa_config locks. |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
112 |
*/ |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
113 |
ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) || |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
114 |
spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER)); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
115 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
116 |
if ((mg = mc->mc_rotor) == NULL) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
117 |
return (0); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
118 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
119 |
do { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
120 |
vd = mg->mg_vd; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
121 |
ASSERT(vd->vdev_mg != NULL); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
122 |
ASSERT3P(vd->vdev_top, ==, vd); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
123 |
ASSERT3P(mg->mg_class, ==, mc); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
124 |
ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
125 |
} while ((mg = mg->mg_next) != mc->mc_rotor); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
126 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
127 |
return (0); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
128 |
} |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
129 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
130 |
void |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
131 |
metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
132 |
int64_t defer_delta, int64_t space_delta, int64_t dspace_delta) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
133 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
134 |
atomic_add_64(&mc->mc_alloc, alloc_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
135 |
atomic_add_64(&mc->mc_deferred, defer_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
136 |
atomic_add_64(&mc->mc_space, space_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
137 |
atomic_add_64(&mc->mc_dspace, dspace_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
138 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
139 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
140 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
141 |
metaslab_class_get_alloc(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
142 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
143 |
return (mc->mc_alloc); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
144 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
145 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
146 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
147 |
metaslab_class_get_deferred(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
148 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
149 |
return (mc->mc_deferred); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
150 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
151 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
152 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
153 |
metaslab_class_get_space(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
154 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
155 |
return (mc->mc_space); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
156 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
157 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
158 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
159 |
metaslab_class_get_dspace(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
160 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
161 |
return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
162 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
163 |
|
789 | 164 |
/* |
165 |
* ========================================================================== |
|
166 |
* Metaslab groups |
|
167 |
* ========================================================================== |
|
168 |
*/ |
|
169 |
static int |
|
170 |
metaslab_compare(const void *x1, const void *x2) |
|
171 |
{ |
|
172 |
const metaslab_t *m1 = x1; |
|
173 |
const metaslab_t *m2 = x2; |
|
174 |
||
175 |
if (m1->ms_weight < m2->ms_weight) |
|
176 |
return (1); |
|
177 |
if (m1->ms_weight > m2->ms_weight) |
|
178 |
return (-1); |
|
179 |
||
180 |
/* |
|
181 |
* If the weights are identical, use the offset to force uniqueness. |
|
182 |
*/ |
|
183 |
if (m1->ms_map.sm_start < m2->ms_map.sm_start) |
|
184 |
return (-1); |
|
185 |
if (m1->ms_map.sm_start > m2->ms_map.sm_start) |
|
186 |
return (1); |
|
187 |
||
188 |
ASSERT3P(m1, ==, m2); |
|
189 |
||
190 |
return (0); |
|
191 |
} |
|
192 |
||
193 |
metaslab_group_t * |
|
194 |
metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) |
|
195 |
{ |
|
196 |
metaslab_group_t *mg; |
|
197 |
||
198 |
mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); |
|
199 |
mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); |
|
200 |
avl_create(&mg->mg_metaslab_tree, metaslab_compare, |
|
201 |
sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); |
|
202 |
mg->mg_vd = vd; |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
203 |
mg->mg_class = mc; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
204 |
mg->mg_activation_count = 0; |
789 | 205 |
|
206 |
return (mg); |
|
207 |
} |
|
208 |
||
209 |
void |
|
210 |
metaslab_group_destroy(metaslab_group_t *mg) |
|
211 |
{ |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
212 |
ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
213 |
ASSERT(mg->mg_next == NULL); |
11026
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
214 |
/* |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
215 |
* We may have gone below zero with the activation count |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
216 |
* either because we never activated in the first place or |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
217 |
* because we're done, and possibly removing the vdev. |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
218 |
*/ |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
219 |
ASSERT(mg->mg_activation_count <= 0); |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
220 |
|
789 | 221 |
avl_destroy(&mg->mg_metaslab_tree); |
222 |
mutex_destroy(&mg->mg_lock); |
|
223 |
kmem_free(mg, sizeof (metaslab_group_t)); |
|
224 |
} |
|
225 |
||
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
226 |
void |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
227 |
metaslab_group_activate(metaslab_group_t *mg) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
228 |
{ |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
229 |
metaslab_class_t *mc = mg->mg_class; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
230 |
metaslab_group_t *mgprev, *mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
231 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
232 |
ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
233 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
234 |
ASSERT(mc->mc_rotor != mg); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
235 |
ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
236 |
ASSERT(mg->mg_next == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
237 |
ASSERT(mg->mg_activation_count <= 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
238 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
239 |
if (++mg->mg_activation_count <= 0) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
240 |
return; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
241 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
242 |
mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
243 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
244 |
if ((mgprev = mc->mc_rotor) == NULL) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
245 |
mg->mg_prev = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
246 |
mg->mg_next = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
247 |
} else { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
248 |
mgnext = mgprev->mg_next; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
249 |
mg->mg_prev = mgprev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
250 |
mg->mg_next = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
251 |
mgprev->mg_next = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
252 |
mgnext->mg_prev = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
253 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
254 |
mc->mc_rotor = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
255 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
256 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
257 |
void |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
258 |
metaslab_group_passivate(metaslab_group_t *mg) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
259 |
{ |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
260 |
metaslab_class_t *mc = mg->mg_class; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
261 |
metaslab_group_t *mgprev, *mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
262 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
263 |
ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
264 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
265 |
if (--mg->mg_activation_count != 0) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
266 |
ASSERT(mc->mc_rotor != mg); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
267 |
ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
268 |
ASSERT(mg->mg_next == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
269 |
ASSERT(mg->mg_activation_count < 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
270 |
return; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
271 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
272 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
273 |
mgprev = mg->mg_prev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
274 |
mgnext = mg->mg_next; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
275 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
276 |
if (mg == mgnext) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
277 |
mc->mc_rotor = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
278 |
} else { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
279 |
mc->mc_rotor = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
280 |
mgprev->mg_next = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
281 |
mgnext->mg_prev = mgprev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
282 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
283 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
284 |
mg->mg_prev = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
285 |
mg->mg_next = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
286 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
287 |
|
1732 | 288 |
static void |
289 |
metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) |
|
789 | 290 |
{ |
291 |
mutex_enter(&mg->mg_lock); |
|
292 |
ASSERT(msp->ms_group == NULL); |
|
293 |
msp->ms_group = mg; |
|
1732 | 294 |
msp->ms_weight = 0; |
789 | 295 |
avl_add(&mg->mg_metaslab_tree, msp); |
296 |
mutex_exit(&mg->mg_lock); |
|
297 |
} |
|
298 |
||
1732 | 299 |
static void |
789 | 300 |
metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) |
301 |
{ |
|
302 |
mutex_enter(&mg->mg_lock); |
|
303 |
ASSERT(msp->ms_group == mg); |
|
304 |
avl_remove(&mg->mg_metaslab_tree, msp); |
|
305 |
msp->ms_group = NULL; |
|
306 |
mutex_exit(&mg->mg_lock); |
|
307 |
} |
|
308 |
||
1732 | 309 |
static void |
789 | 310 |
metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) |
311 |
{ |
|
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
312 |
/* |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
313 |
* Although in principle the weight can be any value, in |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
314 |
* practice we do not use values in the range [1, 510]. |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
315 |
*/ |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
316 |
ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0); |
1732 | 317 |
ASSERT(MUTEX_HELD(&msp->ms_lock)); |
318 |
||
789 | 319 |
mutex_enter(&mg->mg_lock); |
320 |
ASSERT(msp->ms_group == mg); |
|
321 |
avl_remove(&mg->mg_metaslab_tree, msp); |
|
322 |
msp->ms_weight = weight; |
|
323 |
avl_add(&mg->mg_metaslab_tree, msp); |
|
324 |
mutex_exit(&mg->mg_lock); |
|
325 |
} |
|
326 |
||
327 |
/* |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
328 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
329 |
* Common allocator routines |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
330 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
331 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
332 |
static int |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
333 |
metaslab_segsize_compare(const void *x1, const void *x2) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
334 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
335 |
const space_seg_t *s1 = x1; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
336 |
const space_seg_t *s2 = x2; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
337 |
uint64_t ss_size1 = s1->ss_end - s1->ss_start; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
338 |
uint64_t ss_size2 = s2->ss_end - s2->ss_start; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
339 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
340 |
if (ss_size1 < ss_size2) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
341 |
return (-1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
342 |
if (ss_size1 > ss_size2) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
343 |
return (1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
344 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
345 |
if (s1->ss_start < s2->ss_start) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
346 |
return (-1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
347 |
if (s1->ss_start > s2->ss_start) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
348 |
return (1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
349 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
350 |
return (0); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
351 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
352 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
353 |
/* |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
354 |
* This is a helper function that can be used by the allocator to find |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
355 |
* a suitable block to allocate. This will search the specified AVL |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
356 |
* tree looking for a block that matches the specified criteria. |
789 | 357 |
*/ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
358 |
static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
359 |
metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
360 |
uint64_t align) |
789 | 361 |
{ |
362 |
space_seg_t *ss, ssearch; |
|
363 |
avl_index_t where; |
|
364 |
||
365 |
ssearch.ss_start = *cursor; |
|
366 |
ssearch.ss_end = *cursor + size; |
|
367 |
||
368 |
ss = avl_find(t, &ssearch, &where); |
|
369 |
if (ss == NULL) |
|
370 |
ss = avl_nearest(t, where, AVL_AFTER); |
|
371 |
||
372 |
while (ss != NULL) { |
|
373 |
uint64_t offset = P2ROUNDUP(ss->ss_start, align); |
|
374 |
||
375 |
if (offset + size <= ss->ss_end) { |
|
376 |
*cursor = offset + size; |
|
377 |
return (offset); |
|
378 |
} |
|
379 |
ss = AVL_NEXT(t, ss); |
|
380 |
} |
|
381 |
||
1732 | 382 |
/* |
383 |
* If we know we've searched the whole map (*cursor == 0), give up. |
|
384 |
* Otherwise, reset the cursor to the beginning and try again. |
|
385 |
*/ |
|
386 |
if (*cursor == 0) |
|
387 |
return (-1ULL); |
|
388 |
||
389 |
*cursor = 0; |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
390 |
return (metaslab_block_picker(t, cursor, size, align)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
391 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
392 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
393 |
static void |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
394 |
metaslab_pp_load(space_map_t *sm) |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
395 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
396 |
space_seg_t *ss; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
397 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
398 |
ASSERT(sm->sm_ppd == NULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
399 |
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
400 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
401 |
sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
402 |
avl_create(sm->sm_pp_root, metaslab_segsize_compare, |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
403 |
sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
404 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
405 |
for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
406 |
avl_add(sm->sm_pp_root, ss); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
407 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
408 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
409 |
static void |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
410 |
metaslab_pp_unload(space_map_t *sm) |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
411 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
412 |
void *cookie = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
413 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
414 |
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
415 |
sm->sm_ppd = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
416 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
417 |
while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
418 |
/* tear down the tree */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
419 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
420 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
421 |
avl_destroy(sm->sm_pp_root); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
422 |
kmem_free(sm->sm_pp_root, sizeof (avl_tree_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
423 |
sm->sm_pp_root = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
424 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
425 |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
426 |
/* ARGSUSED */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
427 |
static void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
428 |
metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
429 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
430 |
/* No need to update cursor */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
431 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
432 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
433 |
/* ARGSUSED */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
434 |
static void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
435 |
metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
436 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
437 |
/* No need to update cursor */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
438 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
439 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
440 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
441 |
* Return the maximum contiguous segment within the metaslab. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
442 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
443 |
uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
444 |
metaslab_pp_maxsize(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
445 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
446 |
avl_tree_t *t = sm->sm_pp_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
447 |
space_seg_t *ss; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
448 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
449 |
if (t == NULL || (ss = avl_last(t)) == NULL) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
450 |
return (0ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
451 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
452 |
return (ss->ss_end - ss->ss_start); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
453 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
454 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
455 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
456 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
457 |
* The first-fit block allocator |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
458 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
459 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
460 |
static uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
461 |
metaslab_ff_alloc(space_map_t *sm, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
462 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
463 |
avl_tree_t *t = &sm->sm_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
464 |
uint64_t align = size & -size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
465 |
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
466 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
467 |
return (metaslab_block_picker(t, cursor, size, align)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
468 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
469 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
470 |
/* ARGSUSED */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
471 |
boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
472 |
metaslab_ff_fragmented(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
473 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
474 |
return (B_TRUE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
475 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
476 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
477 |
static space_map_ops_t metaslab_ff_ops = { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
478 |
metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
479 |
metaslab_pp_unload, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
480 |
metaslab_ff_alloc, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
481 |
metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
482 |
metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
483 |
metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
484 |
metaslab_ff_fragmented |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
485 |
}; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
486 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
487 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
488 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
489 |
* Dynamic block allocator - |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
490 |
* Uses the first fit allocation scheme until space get low and then |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
491 |
* adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
492 |
* and metaslab_df_free_pct to determine when to switch the allocation scheme. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
493 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
494 |
*/ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
495 |
static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
496 |
metaslab_df_alloc(space_map_t *sm, uint64_t size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
497 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
498 |
avl_tree_t *t = &sm->sm_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
499 |
uint64_t align = size & -size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
500 |
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
501 |
uint64_t max_size = metaslab_pp_maxsize(sm); |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
502 |
int free_pct = sm->sm_space * 100 / sm->sm_size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
503 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
504 |
ASSERT(MUTEX_HELD(sm->sm_lock)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
505 |
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
506 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
507 |
if (max_size < size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
508 |
return (-1ULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
509 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
510 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
511 |
* If we're running low on space switch to using the size |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
512 |
* sorted AVL tree (best-fit). |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
513 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
514 |
if (max_size < metaslab_df_alloc_threshold || |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
515 |
free_pct < metaslab_df_free_pct) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
516 |
t = sm->sm_pp_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
517 |
*cursor = 0; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
518 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
519 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
520 |
return (metaslab_block_picker(t, cursor, size, 1ULL)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
521 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
522 |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
523 |
static boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
524 |
metaslab_df_fragmented(space_map_t *sm) |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
525 |
{ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
526 |
uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
527 |
int free_pct = sm->sm_space * 100 / sm->sm_size; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
528 |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
529 |
if (max_size >= metaslab_df_alloc_threshold && |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
530 |
free_pct >= metaslab_df_free_pct) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
531 |
return (B_FALSE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
532 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
533 |
return (B_TRUE); |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
534 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
535 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
536 |
static space_map_ops_t metaslab_df_ops = { |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
537 |
metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
538 |
metaslab_pp_unload, |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
539 |
metaslab_df_alloc, |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
540 |
metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
541 |
metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
542 |
metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
543 |
metaslab_df_fragmented |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
544 |
}; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
545 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
546 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
547 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
548 |
* Other experimental allocators |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
549 |
* ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
550 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
551 |
static uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
552 |
metaslab_cdf_alloc(space_map_t *sm, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
553 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
554 |
avl_tree_t *t = &sm->sm_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
555 |
uint64_t *cursor = (uint64_t *)sm->sm_ppd; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
556 |
uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
557 |
uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
558 |
uint64_t rsize = size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
559 |
uint64_t offset = 0; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
560 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
561 |
ASSERT(MUTEX_HELD(sm->sm_lock)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
562 |
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
563 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
564 |
if (max_size < size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
565 |
return (-1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
566 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
567 |
ASSERT3U(*extent_end, >=, *cursor); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
568 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
569 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
570 |
* If we're running low on space switch to using the size |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
571 |
* sorted AVL tree (best-fit). |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
572 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
573 |
if ((*cursor + size) > *extent_end) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
574 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
575 |
t = sm->sm_pp_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
576 |
*cursor = *extent_end = 0; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
577 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
578 |
if (max_size > 2 * SPA_MAXBLOCKSIZE) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
579 |
rsize = MIN(metaslab_min_alloc_size, max_size); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
580 |
offset = metaslab_block_picker(t, extent_end, rsize, 1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
581 |
if (offset != -1) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
582 |
*cursor = offset + size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
583 |
} else { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
584 |
offset = metaslab_block_picker(t, cursor, rsize, 1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
585 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
586 |
ASSERT3U(*cursor, <=, *extent_end); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
587 |
return (offset); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
588 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
589 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
590 |
static boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
591 |
metaslab_cdf_fragmented(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
592 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
593 |
uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
594 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
595 |
if (max_size > (metaslab_min_alloc_size * 10)) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
596 |
return (B_FALSE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
597 |
return (B_TRUE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
598 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
599 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
600 |
static space_map_ops_t metaslab_cdf_ops = { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
601 |
metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
602 |
metaslab_pp_unload, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
603 |
metaslab_cdf_alloc, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
604 |
metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
605 |
metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
606 |
metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
607 |
metaslab_cdf_fragmented |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
608 |
}; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
609 |
|
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
610 |
uint64_t metaslab_ndf_clump_shift = 4; |
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
611 |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
612 |
static uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
613 |
metaslab_ndf_alloc(space_map_t *sm, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
614 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
615 |
avl_tree_t *t = &sm->sm_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
616 |
avl_index_t where; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
617 |
space_seg_t *ss, ssearch; |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
618 |
uint64_t hbit = highbit(size); |
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
619 |
uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1; |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
620 |
uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
621 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
622 |
ASSERT(MUTEX_HELD(sm->sm_lock)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
623 |
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
624 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
625 |
if (max_size < size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
626 |
return (-1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
627 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
628 |
ssearch.ss_start = *cursor; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
629 |
ssearch.ss_end = *cursor + size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
630 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
631 |
ss = avl_find(t, &ssearch, &where); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
632 |
if (ss == NULL || (ss->ss_start + size > ss->ss_end)) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
633 |
t = sm->sm_pp_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
634 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
635 |
ssearch.ss_start = 0; |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
636 |
ssearch.ss_end = MIN(max_size, |
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
637 |
1ULL << (hbit + metaslab_ndf_clump_shift)); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
638 |
ss = avl_find(t, &ssearch, &where); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
639 |
if (ss == NULL) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
640 |
ss = avl_nearest(t, where, AVL_AFTER); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
641 |
ASSERT(ss != NULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
642 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
643 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
644 |
if (ss != NULL) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
645 |
if (ss->ss_start + size <= ss->ss_end) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
646 |
*cursor = ss->ss_start + size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
647 |
return (ss->ss_start); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
648 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
649 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
650 |
return (-1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
651 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
652 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
653 |
static boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
654 |
metaslab_ndf_fragmented(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
655 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
656 |
uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
657 |
|
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
658 |
if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift)) |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
659 |
return (B_FALSE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
660 |
return (B_TRUE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
661 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
662 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
663 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
664 |
static space_map_ops_t metaslab_ndf_ops = { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
665 |
metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
666 |
metaslab_pp_unload, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
667 |
metaslab_ndf_alloc, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
668 |
metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
669 |
metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
670 |
metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
671 |
metaslab_ndf_fragmented |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
672 |
}; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
673 |
|
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
674 |
space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
675 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
676 |
/* |
1732 | 677 |
* ========================================================================== |
678 |
* Metaslabs |
|
679 |
* ========================================================================== |
|
680 |
*/ |
|
681 |
metaslab_t * |
|
682 |
metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, |
|
683 |
uint64_t start, uint64_t size, uint64_t txg) |
|
684 |
{ |
|
685 |
vdev_t *vd = mg->mg_vd; |
|
686 |
metaslab_t *msp; |
|
687 |
||
688 |
msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); |
|
2856 | 689 |
mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL); |
1732 | 690 |
|
691 |
msp->ms_smo_syncing = *smo; |
|
692 |
||
693 |
/* |
|
694 |
* We create the main space map here, but we don't create the |
|
695 |
* allocmaps and freemaps until metaslab_sync_done(). This serves |
|
696 |
* two purposes: it allows metaslab_sync_done() to detect the |
|
697 |
* addition of new space; and for debugging, it ensures that we'd |
|
698 |
* data fault on any attempt to use this metaslab before it's ready. |
|
699 |
*/ |
|
700 |
space_map_create(&msp->ms_map, start, size, |
|
701 |
vd->vdev_ashift, &msp->ms_lock); |
|
702 |
||
703 |
metaslab_group_add(mg, msp); |
|
704 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
705 |
if (metaslab_debug && smo->smo_object != 0) { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
706 |
mutex_enter(&msp->ms_lock); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
707 |
VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
708 |
SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
709 |
mutex_exit(&msp->ms_lock); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
710 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
711 |
|
1732 | 712 |
/* |
713 |
* If we're opening an existing pool (txg == 0) or creating |
|
714 |
* a new one (txg == TXG_INITIAL), all space is available now. |
|
715 |
* If we're adding space to an existing pool, the new space |
|
716 |
* does not become available until after this txg has synced. |
|
717 |
*/ |
|
718 |
if (txg <= TXG_INITIAL) |
|
719 |
metaslab_sync_done(msp, 0); |
|
720 |
||
721 |
if (txg != 0) { |
|
722 |
vdev_dirty(vd, 0, NULL, txg); |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
723 |
vdev_dirty(vd, VDD_METASLAB, msp, txg); |
789 | 724 |
} |
725 |
||
1732 | 726 |
return (msp); |
727 |
} |
|
728 |
||
729 |
void |
|
730 |
metaslab_fini(metaslab_t *msp) |
|
731 |
{ |
|
732 |
metaslab_group_t *mg = msp->ms_group; |
|
733 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
734 |
vdev_space_update(mg->mg_vd, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
735 |
-msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size); |
1732 | 736 |
|
737 |
metaslab_group_remove(mg, msp); |
|
738 |
||
739 |
mutex_enter(&msp->ms_lock); |
|
740 |
||
741 |
space_map_unload(&msp->ms_map); |
|
742 |
space_map_destroy(&msp->ms_map); |
|
743 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
744 |
for (int t = 0; t < TXG_SIZE; t++) { |
1732 | 745 |
space_map_destroy(&msp->ms_allocmap[t]); |
746 |
space_map_destroy(&msp->ms_freemap[t]); |
|
747 |
} |
|
748 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
749 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
750 |
space_map_destroy(&msp->ms_defermap[t]); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
751 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
752 |
ASSERT3S(msp->ms_deferspace, ==, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
753 |
|
1732 | 754 |
mutex_exit(&msp->ms_lock); |
2856 | 755 |
mutex_destroy(&msp->ms_lock); |
1732 | 756 |
|
757 |
kmem_free(msp, sizeof (metaslab_t)); |
|
789 | 758 |
} |
759 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
760 |
#define METASLAB_WEIGHT_PRIMARY (1ULL << 63) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
761 |
#define METASLAB_WEIGHT_SECONDARY (1ULL << 62) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
762 |
#define METASLAB_ACTIVE_MASK \ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
763 |
(METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY) |
1732 | 764 |
|
789 | 765 |
static uint64_t |
1732 | 766 |
metaslab_weight(metaslab_t *msp) |
767 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
768 |
metaslab_group_t *mg = msp->ms_group; |
1732 | 769 |
space_map_t *sm = &msp->ms_map; |
770 |
space_map_obj_t *smo = &msp->ms_smo; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
771 |
vdev_t *vd = mg->mg_vd; |
1732 | 772 |
uint64_t weight, space; |
773 |
||
774 |
ASSERT(MUTEX_HELD(&msp->ms_lock)); |
|
775 |
||
776 |
/* |
|
777 |
* The baseline weight is the metaslab's free space. |
|
778 |
*/ |
|
779 |
space = sm->sm_size - smo->smo_alloc; |
|
780 |
weight = space; |
|
781 |
||
782 |
/* |
|
783 |
* Modern disks have uniform bit density and constant angular velocity. |
|
784 |
* Therefore, the outer recording zones are faster (higher bandwidth) |
|
785 |
* than the inner zones by the ratio of outer to inner track diameter, |
|
786 |
* which is typically around 2:1. We account for this by assigning |
|
787 |
* higher weight to lower metaslabs (multiplier ranging from 2x to 1x). |
|
788 |
* In effect, this means that we'll select the metaslab with the most |
|
789 |
* free bandwidth rather than simply the one with the most free space. |
|
790 |
*/ |
|
791 |
weight = 2 * weight - |
|
792 |
((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count; |
|
793 |
ASSERT(weight >= space && weight <= 2 * space); |
|
794 |
||
795 |
/* |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
796 |
* For locality, assign higher weight to metaslabs which have |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
797 |
* a lower offset than what we've already activated. |
1732 | 798 |
*/ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
799 |
if (sm->sm_start <= mg->mg_bonus_area) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
800 |
weight *= (metaslab_smo_bonus_pct / 100); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
801 |
ASSERT(weight >= space && |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
802 |
weight <= 2 * (metaslab_smo_bonus_pct / 100) * space); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
803 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
804 |
if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
805 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
806 |
* If this metaslab is one we're actively using, adjust its |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
807 |
* weight to make it preferable to any inactive metaslab so |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
808 |
* we'll polish it off. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
809 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
810 |
weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
811 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
812 |
return (weight); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
813 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
814 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
815 |
static void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
816 |
metaslab_prefetch(metaslab_group_t *mg) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
817 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
818 |
spa_t *spa = mg->mg_vd->vdev_spa; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
819 |
metaslab_t *msp; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
820 |
avl_tree_t *t = &mg->mg_metaslab_tree; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
821 |
int m; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
822 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
823 |
mutex_enter(&mg->mg_lock); |
1732 | 824 |
|
825 |
/* |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
826 |
* Prefetch the next potential metaslabs |
1732 | 827 |
*/ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
828 |
for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
829 |
space_map_t *sm = &msp->ms_map; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
830 |
space_map_obj_t *smo = &msp->ms_smo; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
831 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
832 |
/* If we have reached our prefetch limit then we're done */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
833 |
if (m >= metaslab_prefetch_limit) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
834 |
break; |
1732 | 835 |
|
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
836 |
if (!sm->sm_loaded && smo->smo_object != 0) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
837 |
mutex_exit(&mg->mg_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
838 |
dmu_prefetch(spa_meta_objset(spa), smo->smo_object, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
839 |
0ULL, smo->smo_objsize); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
840 |
mutex_enter(&mg->mg_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
841 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
842 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
843 |
mutex_exit(&mg->mg_lock); |
1732 | 844 |
} |
845 |
||
846 |
static int |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
847 |
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) |
789 | 848 |
{ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
849 |
metaslab_group_t *mg = msp->ms_group; |
789 | 850 |
space_map_t *sm = &msp->ms_map; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
851 |
space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; |
789 | 852 |
|
853 |
ASSERT(MUTEX_HELD(&msp->ms_lock)); |
|
1732 | 854 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
855 |
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
856 |
space_map_load_wait(sm); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
857 |
if (!sm->sm_loaded) { |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
858 |
int error = space_map_load(sm, sm_ops, SM_FREE, |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
859 |
&msp->ms_smo, |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
860 |
spa_meta_objset(msp->ms_group->mg_vd->vdev_spa)); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
861 |
if (error) { |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
862 |
metaslab_group_sort(msp->ms_group, msp, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
863 |
return (error); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
864 |
} |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
865 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
866 |
space_map_walk(&msp->ms_defermap[t], |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
867 |
space_map_claim, sm); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
868 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
869 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
870 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
871 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
872 |
* Track the bonus area as we activate new metaslabs. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
873 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
874 |
if (sm->sm_start > mg->mg_bonus_area) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
875 |
mutex_enter(&mg->mg_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
876 |
mg->mg_bonus_area = sm->sm_start; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
877 |
mutex_exit(&mg->mg_lock); |
1732 | 878 |
} |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
879 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
880 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
881 |
* If we were able to load the map then make sure |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
882 |
* that this map is still able to satisfy our request. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
883 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
884 |
if (msp->ms_weight < size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
885 |
return (ENOSPC); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
886 |
|
1732 | 887 |
metaslab_group_sort(msp->ms_group, msp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
888 |
msp->ms_weight | activation_weight); |
1732 | 889 |
} |
890 |
ASSERT(sm->sm_loaded); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
891 |
ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); |
1732 | 892 |
|
893 |
return (0); |
|
894 |
} |
|
895 |
||
896 |
static void |
|
897 |
metaslab_passivate(metaslab_t *msp, uint64_t size) |
|
898 |
{ |
|
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
899 |
/* |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
900 |
* If size < SPA_MINBLOCKSIZE, then we will not allocate from |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
901 |
* this metaslab again. In that case, it had better be empty, |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
902 |
* or we would be leaving space on the table. |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
903 |
*/ |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
904 |
ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
905 |
metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size)); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
906 |
ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); |
1732 | 907 |
} |
908 |
||
909 |
/* |
|
910 |
* Write a metaslab to disk in the context of the specified transaction group. |
|
911 |
*/ |
|
912 |
void |
|
913 |
metaslab_sync(metaslab_t *msp, uint64_t txg) |
|
914 |
{ |
|
915 |
vdev_t *vd = msp->ms_group->mg_vd; |
|
916 |
spa_t *spa = vd->vdev_spa; |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
917 |
objset_t *mos = spa_meta_objset(spa); |
1732 | 918 |
space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK]; |
919 |
space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK]; |
|
920 |
space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
|
921 |
space_map_t *sm = &msp->ms_map; |
|
922 |
space_map_obj_t *smo = &msp->ms_smo_syncing; |
|
923 |
dmu_buf_t *db; |
|
924 |
dmu_tx_t *tx; |
|
925 |
||
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
926 |
ASSERT(!vd->vdev_ishole); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
927 |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
928 |
if (allocmap->sm_space == 0 && freemap->sm_space == 0) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
929 |
return; |
1732 | 930 |
|
931 |
/* |
|
932 |
* The only state that can actually be changing concurrently with |
|
933 |
* metaslab_sync() is the metaslab's ms_map. No other thread can |
|
934 |
* be modifying this txg's allocmap, freemap, freed_map, or smo. |
|
935 |
* Therefore, we only hold ms_lock to satify space_map ASSERTs. |
|
936 |
* We drop it whenever we call into the DMU, because the DMU |
|
937 |
* can call down to us (e.g. via zio_free()) at any time. |
|
938 |
*/ |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
939 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
940 |
tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); |
1732 | 941 |
|
942 |
if (smo->smo_object == 0) { |
|
943 |
ASSERT(smo->smo_objsize == 0); |
|
944 |
ASSERT(smo->smo_alloc == 0); |
|
945 |
smo->smo_object = dmu_object_alloc(mos, |
|
946 |
DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, |
|
947 |
DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); |
|
948 |
ASSERT(smo->smo_object != 0); |
|
949 |
dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) * |
|
950 |
(sm->sm_start >> vd->vdev_ms_shift), |
|
951 |
sizeof (uint64_t), &smo->smo_object, tx); |
|
952 |
} |
|
953 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
954 |
mutex_enter(&msp->ms_lock); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
955 |
|
1732 | 956 |
space_map_walk(freemap, space_map_add, freed_map); |
957 |
||
958 |
if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >= |
|
959 |
2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) { |
|
960 |
/* |
|
961 |
* The in-core space map representation is twice as compact |
|
962 |
* as the on-disk one, so it's time to condense the latter |
|
963 |
* by generating a pure allocmap from first principles. |
|
964 |
* |
|
965 |
* This metaslab is 100% allocated, |
|
966 |
* minus the content of the in-core map (sm), |
|
967 |
* minus what's been freed this txg (freed_map), |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
968 |
* minus deferred frees (ms_defermap[]), |
1732 | 969 |
* minus allocations from txgs in the future |
970 |
* (because they haven't been committed yet). |
|
971 |
*/ |
|
972 |
space_map_vacate(allocmap, NULL, NULL); |
|
973 |
space_map_vacate(freemap, NULL, NULL); |
|
974 |
||
975 |
space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size); |
|
789 | 976 |
|
1732 | 977 |
space_map_walk(sm, space_map_remove, allocmap); |
978 |
space_map_walk(freed_map, space_map_remove, allocmap); |
|
979 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
980 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
981 |
space_map_walk(&msp->ms_defermap[t], |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
982 |
space_map_remove, allocmap); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
983 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
984 |
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) |
1732 | 985 |
space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK], |
986 |
space_map_remove, allocmap); |
|
987 |
||
988 |
mutex_exit(&msp->ms_lock); |
|
989 |
space_map_truncate(smo, mos, tx); |
|
990 |
mutex_enter(&msp->ms_lock); |
|
789 | 991 |
} |
1732 | 992 |
|
993 |
space_map_sync(allocmap, SM_ALLOC, smo, mos, tx); |
|
994 |
space_map_sync(freemap, SM_FREE, smo, mos, tx); |
|
995 |
||
996 |
mutex_exit(&msp->ms_lock); |
|
997 |
||
998 |
VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); |
|
999 |
dmu_buf_will_dirty(db, tx); |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4527
diff
changeset
|
1000 |
ASSERT3U(db->db_size, >=, sizeof (*smo)); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4527
diff
changeset
|
1001 |
bcopy(smo, db->db_data, sizeof (*smo)); |
1732 | 1002 |
dmu_buf_rele(db, FTAG); |
1003 |
||
1004 |
dmu_tx_commit(tx); |
|
1005 |
} |
|
1006 |
||
1007 |
/* |
|
1008 |
* Called after a transaction group has completely synced to mark |
|
1009 |
* all of the metaslab's free space as usable. |
|
1010 |
*/ |
|
1011 |
void |
|
1012 |
metaslab_sync_done(metaslab_t *msp, uint64_t txg) |
|
1013 |
{ |
|
1014 |
space_map_obj_t *smo = &msp->ms_smo; |
|
1015 |
space_map_obj_t *smosync = &msp->ms_smo_syncing; |
|
1016 |
space_map_t *sm = &msp->ms_map; |
|
1017 |
space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1018 |
space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; |
1732 | 1019 |
metaslab_group_t *mg = msp->ms_group; |
1020 |
vdev_t *vd = mg->mg_vd; |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1021 |
int64_t alloc_delta, defer_delta; |
1732 | 1022 |
|
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1023 |
ASSERT(!vd->vdev_ishole); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1024 |
|
1732 | 1025 |
mutex_enter(&msp->ms_lock); |
1026 |
||
1027 |
/* |
|
1028 |
* If this metaslab is just becoming available, initialize its |
|
1029 |
* allocmaps and freemaps and add its capacity to the vdev. |
|
1030 |
*/ |
|
1031 |
if (freed_map->sm_size == 0) { |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1032 |
for (int t = 0; t < TXG_SIZE; t++) { |
1732 | 1033 |
space_map_create(&msp->ms_allocmap[t], sm->sm_start, |
1034 |
sm->sm_size, sm->sm_shift, sm->sm_lock); |
|
1035 |
space_map_create(&msp->ms_freemap[t], sm->sm_start, |
|
1036 |
sm->sm_size, sm->sm_shift, sm->sm_lock); |
|
1037 |
} |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1038 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1039 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1040 |
space_map_create(&msp->ms_defermap[t], sm->sm_start, |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1041 |
sm->sm_size, sm->sm_shift, sm->sm_lock); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1042 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1043 |
vdev_space_update(vd, 0, 0, sm->sm_size); |
1732 | 1044 |
} |
1045 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1046 |
alloc_delta = smosync->smo_alloc - smo->smo_alloc; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1047 |
defer_delta = freed_map->sm_space - defer_map->sm_space; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1048 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1049 |
vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0); |
1732 | 1050 |
|
1051 |
ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0); |
|
1052 |
ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0); |
|
1053 |
||
1054 |
/* |
|
1055 |
* If there's a space_map_load() in progress, wait for it to complete |
|
1056 |
* so that we have a consistent view of the in-core space map. |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1057 |
* Then, add defer_map (oldest deferred frees) to this map and |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1058 |
* transfer freed_map (this txg's frees) to defer_map. |
1732 | 1059 |
*/ |
1060 |
space_map_load_wait(sm); |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1061 |
space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1062 |
space_map_vacate(freed_map, space_map_add, defer_map); |
1732 | 1063 |
|
1064 |
*smo = *smosync; |
|
1065 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1066 |
msp->ms_deferspace += defer_delta; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1067 |
ASSERT3S(msp->ms_deferspace, >=, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1068 |
ASSERT3S(msp->ms_deferspace, <=, sm->sm_size); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1069 |
if (msp->ms_deferspace != 0) { |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1070 |
/* |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1071 |
* Keep syncing this metaslab until all deferred frees |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1072 |
* are back in circulation. |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1073 |
*/ |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1074 |
vdev_dirty(vd, VDD_METASLAB, msp, txg + 1); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1075 |
} |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1076 |
|
1732 | 1077 |
/* |
1078 |
* If the map is loaded but no longer active, evict it as soon as all |
|
1079 |
* future allocations have synced. (If we unloaded it now and then |
|
1080 |
* loaded a moment later, the map wouldn't reflect those allocations.) |
|
1081 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1082 |
if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { |
1732 | 1083 |
int evictable = 1; |
1084 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1085 |
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) |
1732 | 1086 |
if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space) |
1087 |
evictable = 0; |
|
1088 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1089 |
if (evictable && !metaslab_debug) |
1732 | 1090 |
space_map_unload(sm); |
1091 |
} |
|
1092 |
||
1093 |
metaslab_group_sort(mg, msp, metaslab_weight(msp)); |
|
1094 |
||
1095 |
mutex_exit(&msp->ms_lock); |
|
789 | 1096 |
} |
1097 |
||
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1098 |
void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1099 |
metaslab_sync_reassess(metaslab_group_t *mg) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1100 |
{ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1101 |
vdev_t *vd = mg->mg_vd; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1102 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1103 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1104 |
* Re-evaluate all metaslabs which have lower offsets than the |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1105 |
* bonus area. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1106 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1107 |
for (int m = 0; m < vd->vdev_ms_count; m++) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1108 |
metaslab_t *msp = vd->vdev_ms[m]; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1109 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1110 |
if (msp->ms_map.sm_start > mg->mg_bonus_area) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1111 |
break; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1112 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1113 |
mutex_enter(&msp->ms_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1114 |
metaslab_group_sort(mg, msp, metaslab_weight(msp)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1115 |
mutex_exit(&msp->ms_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1116 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1117 |
|
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1118 |
/* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1119 |
* Prefetch the next potential metaslabs |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1120 |
*/ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1121 |
metaslab_prefetch(mg); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1122 |
} |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1123 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1124 |
static uint64_t |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1125 |
metaslab_distance(metaslab_t *msp, dva_t *dva) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1126 |
{ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1127 |
uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1128 |
uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1129 |
uint64_t start = msp->ms_map.sm_start >> ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1130 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1131 |
if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1132 |
return (1ULL << 63); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1133 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1134 |
if (offset < start) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1135 |
return ((start - offset) << ms_shift); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1136 |
if (offset > start) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1137 |
return ((offset - start) << ms_shift); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1138 |
return (0); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1139 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1140 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1141 |
static uint64_t |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1142 |
metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1143 |
uint64_t min_distance, dva_t *dva, int d) |
789 | 1144 |
{ |
1732 | 1145 |
metaslab_t *msp = NULL; |
1146 |
uint64_t offset = -1ULL; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1147 |
avl_tree_t *t = &mg->mg_metaslab_tree; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1148 |
uint64_t activation_weight; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1149 |
uint64_t target_distance; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1150 |
int i; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1151 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1152 |
activation_weight = METASLAB_WEIGHT_PRIMARY; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1153 |
for (i = 0; i < d; i++) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1154 |
if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1155 |
activation_weight = METASLAB_WEIGHT_SECONDARY; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1156 |
break; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1157 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1158 |
} |
789 | 1159 |
|
1732 | 1160 |
for (;;) { |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1161 |
boolean_t was_active; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1162 |
|
1732 | 1163 |
mutex_enter(&mg->mg_lock); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1164 |
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1165 |
if (msp->ms_weight < size) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1166 |
mutex_exit(&mg->mg_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1167 |
return (-1ULL); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1168 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1169 |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1170 |
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1171 |
if (activation_weight == METASLAB_WEIGHT_PRIMARY) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1172 |
break; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1173 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1174 |
target_distance = min_distance + |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1175 |
(msp->ms_smo.smo_alloc ? 0 : min_distance >> 1); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1176 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1177 |
for (i = 0; i < d; i++) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1178 |
if (metaslab_distance(msp, &dva[i]) < |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1179 |
target_distance) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1180 |
break; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1181 |
if (i == d) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1182 |
break; |
1732 | 1183 |
} |
1184 |
mutex_exit(&mg->mg_lock); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1185 |
if (msp == NULL) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1186 |
return (-1ULL); |
789 | 1187 |
|
1188 |
mutex_enter(&msp->ms_lock); |
|
1732 | 1189 |
|
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1190 |
/* |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1191 |
* Ensure that the metaslab we have selected is still |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1192 |
* capable of handling our request. It's possible that |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1193 |
* another thread may have changed the weight while we |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1194 |
* were blocked on the metaslab lock. |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1195 |
*/ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1196 |
if (msp->ms_weight < size || (was_active && |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1197 |
!(msp->ms_weight & METASLAB_ACTIVE_MASK) && |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1198 |
activation_weight == METASLAB_WEIGHT_PRIMARY)) { |
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1199 |
mutex_exit(&msp->ms_lock); |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1200 |
continue; |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1201 |
} |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1202 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1203 |
if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) && |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1204 |
activation_weight == METASLAB_WEIGHT_PRIMARY) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1205 |
metaslab_passivate(msp, |
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
1206 |
msp->ms_weight & ~METASLAB_ACTIVE_MASK); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1207 |
mutex_exit(&msp->ms_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1208 |
continue; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1209 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1210 |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1211 |
if (metaslab_activate(msp, activation_weight, size) != 0) { |
789 | 1212 |
mutex_exit(&msp->ms_lock); |
1213 |
continue; |
|
1214 |
} |
|
1732 | 1215 |
|
1216 |
if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) |
|
1217 |
break; |
|
1218 |
||
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1219 |
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); |
1732 | 1220 |
|
789 | 1221 |
mutex_exit(&msp->ms_lock); |
1222 |
} |
|
1223 |
||
1732 | 1224 |
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) |
1225 |
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); |
|
1226 |
||
1227 |
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); |
|
1228 |
||
1229 |
mutex_exit(&msp->ms_lock); |
|
1230 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1231 |
return (offset); |
789 | 1232 |
} |
1233 |
||
1234 |
/* |
|
1235 |
* Allocate a block for the specified i/o. |
|
1236 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1237 |
static int |
4527 | 1238 |
metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1239 |
dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) |
789 | 1240 |
{ |
1241 |
metaslab_group_t *mg, *rotor; |
|
1242 |
vdev_t *vd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1243 |
int dshift = 3; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1244 |
int all_zero; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1245 |
int zio_lock = B_FALSE; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1246 |
boolean_t allocatable; |
789 | 1247 |
uint64_t offset = -1ULL; |
1248 |
uint64_t asize; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1249 |
uint64_t distance; |
789 | 1250 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1251 |
ASSERT(!DVA_IS_VALID(&dva[d])); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1252 |
|
789 | 1253 |
/* |
5530 | 1254 |
* For testing, make some blocks above a certain size be gang blocks. |
1255 |
*/ |
|
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
11026
diff
changeset
|
1256 |
if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) |
5530 | 1257 |
return (ENOSPC); |
1258 |
||
1259 |
/* |
|
789 | 1260 |
* Start at the rotor and loop through all mgs until we find something. |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1261 |
* Note that there's no locking on mc_rotor or mc_aliquot because |
789 | 1262 |
* nothing actually breaks if we miss a few updates -- we just won't |
1263 |
* allocate quite as evenly. It all balances out over time. |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1264 |
* |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1265 |
* If we are doing ditto or log blocks, try to spread them across |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1266 |
* consecutive vdevs. If we're forced to reuse a vdev before we've |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1267 |
* allocated all of our ditto blocks, then try and spread them out on |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1268 |
* that vdev as much as possible. If it turns out to not be possible, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1269 |
* gradually lower our standards until anything becomes acceptable. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1270 |
* Also, allocating on consecutive vdevs (as opposed to random vdevs) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1271 |
* gives us hope of containing our fault domains to something we're |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1272 |
* able to reason about. Otherwise, any two top-level vdev failures |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1273 |
* will guarantee the loss of data. With consecutive allocation, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1274 |
* only two adjacent top-level vdev failures will result in data loss. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1275 |
* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1276 |
* If we are doing gang blocks (hintdva is non-NULL), try to keep |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1277 |
* ourselves on the same vdev as our gang block header. That |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1278 |
* way, we can hope for locality in vdev_cache, plus it makes our |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1279 |
* fault domains something tractable. |
789 | 1280 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1281 |
if (hintdva) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1282 |
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d])); |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1283 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1284 |
/* |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1285 |
* It's possible the vdev we're using as the hint no |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1286 |
* longer exists (i.e. removed). Consult the rotor when |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1287 |
* all else fails. |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1288 |
*/ |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1289 |
if (vd != NULL) { |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1290 |
mg = vd->vdev_mg; |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1291 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1292 |
if (flags & METASLAB_HINTBP_AVOID && |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1293 |
mg->mg_next != NULL) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1294 |
mg = mg->mg_next; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1295 |
} else { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1296 |
mg = mc->mc_rotor; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1297 |
} |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1298 |
} else if (d != 0) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1299 |
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1300 |
mg = vd->vdev_mg->mg_next; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1301 |
} else { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1302 |
mg = mc->mc_rotor; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1303 |
} |
4527 | 1304 |
|
1305 |
/* |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1306 |
* If the hint put us into the wrong metaslab class, or into a |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1307 |
* metaslab group that has been passivated, just follow the rotor. |
4527 | 1308 |
*/ |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1309 |
if (mg->mg_class != mc || mg->mg_activation_count <= 0) |
4527 | 1310 |
mg = mc->mc_rotor; |
1311 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1312 |
rotor = mg; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1313 |
top: |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1314 |
all_zero = B_TRUE; |
789 | 1315 |
do { |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1316 |
ASSERT(mg->mg_activation_count == 1); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1317 |
|
789 | 1318 |
vd = mg->mg_vd; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1319 |
|
5329 | 1320 |
/* |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1321 |
* Don't allocate from faulted devices. |
5329 | 1322 |
*/ |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1323 |
if (zio_lock) { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1324 |
spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1325 |
allocatable = vdev_allocatable(vd); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1326 |
spa_config_exit(spa, SCL_ZIO, FTAG); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1327 |
} else { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1328 |
allocatable = vdev_allocatable(vd); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1329 |
} |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1330 |
if (!allocatable) |
5329 | 1331 |
goto next; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1332 |
|
5329 | 1333 |
/* |
1334 |
* Avoid writing single-copy data to a failing vdev |
|
1335 |
*/ |
|
1336 |
if ((vd->vdev_stat.vs_write_errors > 0 || |
|
1337 |
vd->vdev_state < VDEV_STATE_HEALTHY) && |
|
1338 |
d == 0 && dshift == 3) { |
|
1339 |
all_zero = B_FALSE; |
|
1340 |
goto next; |
|
1341 |
} |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1342 |
|
4527 | 1343 |
ASSERT(mg->mg_class == mc); |
1344 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1345 |
distance = vd->vdev_asize >> dshift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1346 |
if (distance <= (1ULL << vd->vdev_ms_shift)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1347 |
distance = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1348 |
else |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1349 |
all_zero = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1350 |
|
789 | 1351 |
asize = vdev_psize_to_asize(vd, psize); |
1352 |
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); |
|
1353 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1354 |
offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1355 |
if (offset != -1ULL) { |
789 | 1356 |
/* |
1357 |
* If we've just selected this metaslab group, |
|
1358 |
* figure out whether the corresponding vdev is |
|
1359 |
* over- or under-used relative to the pool, |
|
1360 |
* and set an allocation bias to even it out. |
|
1361 |
*/ |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1362 |
if (mc->mc_aliquot == 0) { |
789 | 1363 |
vdev_stat_t *vs = &vd->vdev_stat; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1364 |
int64_t vu, cu; |
789 | 1365 |
|
1366 |
/* |
|
1367 |
* Determine percent used in units of 0..1024. |
|
1368 |
* (This is just to avoid floating point.) |
|
1369 |
*/ |
|
1370 |
vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1371 |
cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); |
789 | 1372 |
|
1373 |
/* |
|
1374 |
* Bias by at most +/- 25% of the aliquot. |
|
1375 |
*/ |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1376 |
mg->mg_bias = ((cu - vu) * |
789 | 1377 |
(int64_t)mg->mg_aliquot) / (1024 * 4); |
1378 |
} |
|
1379 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1380 |
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= |
789 | 1381 |
mg->mg_aliquot + mg->mg_bias) { |
1382 |
mc->mc_rotor = mg->mg_next; |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1383 |
mc->mc_aliquot = 0; |
789 | 1384 |
} |
1385 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1386 |
DVA_SET_VDEV(&dva[d], vd->vdev_id); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1387 |
DVA_SET_OFFSET(&dva[d], offset); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1388 |
DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER)); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1389 |
DVA_SET_ASIZE(&dva[d], asize); |
789 | 1390 |
|
1391 |
return (0); |
|
1392 |
} |
|
5329 | 1393 |
next: |
789 | 1394 |
mc->mc_rotor = mg->mg_next; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1395 |
mc->mc_aliquot = 0; |
789 | 1396 |
} while ((mg = mg->mg_next) != rotor); |
1397 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1398 |
if (!all_zero) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1399 |
dshift++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1400 |
ASSERT(dshift < 64); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1401 |
goto top; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1402 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1403 |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1404 |
if (!allocatable && !zio_lock) { |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1405 |
dshift = 3; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1406 |
zio_lock = B_TRUE; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1407 |
goto top; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1408 |
} |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1409 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1410 |
bzero(&dva[d], sizeof (dva_t)); |
789 | 1411 |
|
1412 |
return (ENOSPC); |
|
1413 |
} |
|
1414 |
||
1415 |
/* |
|
1416 |
* Free the block represented by DVA in the context of the specified |
|
1417 |
* transaction group. |
|
1418 |
*/ |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1419 |
static void |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1420 |
metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now) |
789 | 1421 |
{ |
1422 |
uint64_t vdev = DVA_GET_VDEV(dva); |
|
1423 |
uint64_t offset = DVA_GET_OFFSET(dva); |
|
1424 |
uint64_t size = DVA_GET_ASIZE(dva); |
|
1425 |
vdev_t *vd; |
|
1426 |
metaslab_t *msp; |
|
1427 |
||
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1428 |
ASSERT(DVA_IS_VALID(dva)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1429 |
|
789 | 1430 |
if (txg > spa_freeze_txg(spa)) |
1431 |
return; |
|
1432 |
||
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1433 |
if ((vd = vdev_lookup_top(spa, vdev)) == NULL || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1434 |
(offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1435 |
cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu", |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1436 |
(u_longlong_t)vdev, (u_longlong_t)offset); |
789 | 1437 |
ASSERT(0); |
1438 |
return; |
|
1439 |
} |
|
1440 |
||
1441 |
msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; |
|
1442 |
||
1443 |
if (DVA_GET_GANG(dva)) |
|
1444 |
size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); |
|
1445 |
||
1446 |
mutex_enter(&msp->ms_lock); |
|
1447 |
||
1732 | 1448 |
if (now) { |
1449 |
space_map_remove(&msp->ms_allocmap[txg & TXG_MASK], |
|
1450 |
offset, size); |
|
1451 |
space_map_free(&msp->ms_map, offset, size); |
|
1452 |
} else { |
|
1453 |
if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0) |
|
1454 |
vdev_dirty(vd, VDD_METASLAB, msp, txg); |
|
1455 |
space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size); |
|
789 | 1456 |
} |
1457 |
||
1458 |
mutex_exit(&msp->ms_lock); |
|
1459 |
} |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1460 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1461 |
/* |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1462 |
* Intent log support: upon opening the pool after a crash, notify the SPA |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1463 |
* of blocks that the intent log has allocated for immediate write, but |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1464 |
* which are still considered free by the SPA because the last transaction |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1465 |
* group didn't commit yet. |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1466 |
*/ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1467 |
static int |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1468 |
metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1469 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1470 |
uint64_t vdev = DVA_GET_VDEV(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1471 |
uint64_t offset = DVA_GET_OFFSET(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1472 |
uint64_t size = DVA_GET_ASIZE(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1473 |
vdev_t *vd; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1474 |
metaslab_t *msp; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1475 |
int error = 0; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1476 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1477 |
ASSERT(DVA_IS_VALID(dva)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1478 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1479 |
if ((vd = vdev_lookup_top(spa, vdev)) == NULL || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1480 |
(offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1481 |
return (ENXIO); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1482 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1483 |
msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1484 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1485 |
if (DVA_GET_GANG(dva)) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1486 |
size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1487 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1488 |
mutex_enter(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1489 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1490 |
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1491 |
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1492 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1493 |
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1494 |
error = ENOENT; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1495 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1496 |
if (error || txg == 0) { /* txg == 0 indicates dry run */ |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1497 |
mutex_exit(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1498 |
return (error); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1499 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1500 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1501 |
space_map_claim(&msp->ms_map, offset, size); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1502 |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1503 |
if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */ |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1504 |
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1505 |
vdev_dirty(vd, VDD_METASLAB, msp, txg); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1506 |
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1507 |
} |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1508 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1509 |
mutex_exit(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1510 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1511 |
return (0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1512 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1513 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1514 |
int |
4527 | 1515 |
metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1516 |
int ndvas, uint64_t txg, blkptr_t *hintbp, int flags) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1517 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1518 |
dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1519 |
dva_t *hintdva = hintbp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1520 |
int error = 0; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1521 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1522 |
ASSERT(bp->blk_birth == 0); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1523 |
ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1524 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1525 |
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1526 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1527 |
if (mc->mc_rotor == NULL) { /* no vdevs in this class */ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1528 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
4527 | 1529 |
return (ENOSPC); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1530 |
} |
4527 | 1531 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1532 |
ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1533 |
ASSERT(BP_GET_NDVAS(bp) == 0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1534 |
ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1535 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1536 |
for (int d = 0; d < ndvas; d++) { |
4527 | 1537 |
error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1538 |
txg, flags); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1539 |
if (error) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1540 |
for (d--; d >= 0; d--) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1541 |
metaslab_free_dva(spa, &dva[d], txg, B_TRUE); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1542 |
bzero(&dva[d], sizeof (dva_t)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1543 |
} |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1544 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1545 |
return (error); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1546 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1547 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1548 |
ASSERT(error == 0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1549 |
ASSERT(BP_GET_NDVAS(bp) == ndvas); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1550 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1551 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1552 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1553 |
BP_SET_BIRTH(bp, txg, txg); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1554 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1555 |
return (0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1556 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1557 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1558 |
void |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1559 |
metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1560 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1561 |
const dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1562 |
int ndvas = BP_GET_NDVAS(bp); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1563 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1564 |
ASSERT(!BP_IS_HOLE(bp)); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1565 |
ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa)); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1566 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1567 |
spa_config_enter(spa, SCL_FREE, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1568 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1569 |
for (int d = 0; d < ndvas; d++) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1570 |
metaslab_free_dva(spa, &dva[d], txg, now); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1571 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1572 |
spa_config_exit(spa, SCL_FREE, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1573 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1574 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1575 |
int |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1576 |
metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1577 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1578 |
const dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1579 |
int ndvas = BP_GET_NDVAS(bp); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1580 |
int error = 0; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1581 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1582 |
ASSERT(!BP_IS_HOLE(bp)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1583 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1584 |
if (txg != 0) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1585 |
/* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1586 |
* First do a dry run to make sure all DVAs are claimable, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1587 |
* so we don't have to unwind from partial failures below. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1588 |
*/ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1589 |
if ((error = metaslab_claim(spa, bp, 0)) != 0) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1590 |
return (error); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1591 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1592 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1593 |
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1594 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1595 |
for (int d = 0; d < ndvas; d++) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1596 |
if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0) |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1597 |
break; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1598 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1599 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1600 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1601 |
ASSERT(error == 0 || txg == 0); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1602 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1603 |
return (error); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1604 |
} |