author | Tim Haley <Tim.Haley@Sun.COM> |
Mon, 09 Nov 2009 23:34:30 -0700 | |
changeset 11026 | e8e10df16a8f |
parent 10974 | 32d689ba6466 |
child 11066 | cebb50cbe4f9 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
22 |
* Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#include <sys/zfs_context.h> |
|
27 |
#include <sys/dmu.h> |
|
28 |
#include <sys/dmu_tx.h> |
|
29 |
#include <sys/space_map.h> |
|
30 |
#include <sys/metaslab_impl.h> |
|
31 |
#include <sys/vdev_impl.h> |
|
32 |
#include <sys/zio.h> |
|
33 |
||
2391 | 34 |
uint64_t metaslab_aliquot = 512ULL << 10; |
5530 | 35 |
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ |
2391 | 36 |
|
789 | 37 |
/* |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
38 |
* Metaslab debugging: when set, keeps all space maps in core to verify frees. |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
39 |
*/ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
40 |
static int metaslab_debug = 0; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
41 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
42 |
/* |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
43 |
* Minimum size which forces the dynamic allocator to change |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
44 |
* it's allocation strategy. Once the space map cannot satisfy |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
45 |
* an allocation of this size then it switches to using more |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
46 |
* aggressive strategy (i.e search by size rather than offset). |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
47 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
48 |
uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
49 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
50 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
51 |
* The minimum free space, in percent, which must be available |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
52 |
* in a space map to continue allocations in a first-fit fashion. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
53 |
* Once the space_map's free space drops below this level we dynamically |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
54 |
* switch to using best-fit allocations. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
55 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
56 |
int metaslab_df_free_pct = 30; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
57 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
58 |
/* |
789 | 59 |
* ========================================================================== |
60 |
* Metaslab classes |
|
61 |
* ========================================================================== |
|
62 |
*/ |
|
63 |
metaslab_class_t * |
|
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
64 |
metaslab_class_create(spa_t *spa, space_map_ops_t *ops) |
789 | 65 |
{ |
66 |
metaslab_class_t *mc; |
|
67 |
||
68 |
mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); |
|
69 |
||
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
70 |
mc->mc_spa = spa; |
789 | 71 |
mc->mc_rotor = NULL; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
72 |
mc->mc_ops = ops; |
789 | 73 |
|
74 |
return (mc); |
|
75 |
} |
|
76 |
||
77 |
void |
|
78 |
metaslab_class_destroy(metaslab_class_t *mc) |
|
79 |
{ |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
80 |
ASSERT(mc->mc_rotor == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
81 |
ASSERT(mc->mc_alloc == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
82 |
ASSERT(mc->mc_deferred == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
83 |
ASSERT(mc->mc_space == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
84 |
ASSERT(mc->mc_dspace == 0); |
789 | 85 |
|
86 |
kmem_free(mc, sizeof (metaslab_class_t)); |
|
87 |
} |
|
88 |
||
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
89 |
int |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
90 |
metaslab_class_validate(metaslab_class_t *mc) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
91 |
{ |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
92 |
metaslab_group_t *mg; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
93 |
vdev_t *vd; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
94 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
95 |
/* |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
96 |
* Must hold one of the spa_config locks. |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
97 |
*/ |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
98 |
ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) || |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
99 |
spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER)); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
100 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
101 |
if ((mg = mc->mc_rotor) == NULL) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
102 |
return (0); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
103 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
104 |
do { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
105 |
vd = mg->mg_vd; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
106 |
ASSERT(vd->vdev_mg != NULL); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
107 |
ASSERT3P(vd->vdev_top, ==, vd); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
108 |
ASSERT3P(mg->mg_class, ==, mc); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
109 |
ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
110 |
} while ((mg = mg->mg_next) != mc->mc_rotor); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
111 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
112 |
return (0); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
113 |
} |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
114 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
115 |
void |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
116 |
metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
117 |
int64_t defer_delta, int64_t space_delta, int64_t dspace_delta) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
118 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
119 |
atomic_add_64(&mc->mc_alloc, alloc_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
120 |
atomic_add_64(&mc->mc_deferred, defer_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
121 |
atomic_add_64(&mc->mc_space, space_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
122 |
atomic_add_64(&mc->mc_dspace, dspace_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
123 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
124 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
125 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
126 |
metaslab_class_get_alloc(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
127 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
128 |
return (mc->mc_alloc); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
129 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
130 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
131 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
132 |
metaslab_class_get_deferred(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
133 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
134 |
return (mc->mc_deferred); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
135 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
136 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
137 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
138 |
metaslab_class_get_space(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
139 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
140 |
return (mc->mc_space); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
141 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
142 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
143 |
uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
144 |
metaslab_class_get_dspace(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
145 |
{ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
146 |
return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
147 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
148 |
|
789 | 149 |
/* |
150 |
* ========================================================================== |
|
151 |
* Metaslab groups |
|
152 |
* ========================================================================== |
|
153 |
*/ |
|
154 |
static int |
|
155 |
metaslab_compare(const void *x1, const void *x2) |
|
156 |
{ |
|
157 |
const metaslab_t *m1 = x1; |
|
158 |
const metaslab_t *m2 = x2; |
|
159 |
||
160 |
if (m1->ms_weight < m2->ms_weight) |
|
161 |
return (1); |
|
162 |
if (m1->ms_weight > m2->ms_weight) |
|
163 |
return (-1); |
|
164 |
||
165 |
/* |
|
166 |
* If the weights are identical, use the offset to force uniqueness. |
|
167 |
*/ |
|
168 |
if (m1->ms_map.sm_start < m2->ms_map.sm_start) |
|
169 |
return (-1); |
|
170 |
if (m1->ms_map.sm_start > m2->ms_map.sm_start) |
|
171 |
return (1); |
|
172 |
||
173 |
ASSERT3P(m1, ==, m2); |
|
174 |
||
175 |
return (0); |
|
176 |
} |
|
177 |
||
178 |
metaslab_group_t * |
|
179 |
metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) |
|
180 |
{ |
|
181 |
metaslab_group_t *mg; |
|
182 |
||
183 |
mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); |
|
184 |
mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); |
|
185 |
avl_create(&mg->mg_metaslab_tree, metaslab_compare, |
|
186 |
sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); |
|
187 |
mg->mg_vd = vd; |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
188 |
mg->mg_class = mc; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
189 |
mg->mg_activation_count = 0; |
789 | 190 |
|
191 |
return (mg); |
|
192 |
} |
|
193 |
||
194 |
void |
|
195 |
metaslab_group_destroy(metaslab_group_t *mg) |
|
196 |
{ |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
197 |
ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
198 |
ASSERT(mg->mg_next == NULL); |
11026
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
199 |
/* |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
200 |
* We may have gone below zero with the activation count |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
201 |
* either because we never activated in the first place or |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
202 |
* because we're done, and possibly removing the vdev. |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
203 |
*/ |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
204 |
ASSERT(mg->mg_activation_count <= 0); |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
205 |
|
789 | 206 |
avl_destroy(&mg->mg_metaslab_tree); |
207 |
mutex_destroy(&mg->mg_lock); |
|
208 |
kmem_free(mg, sizeof (metaslab_group_t)); |
|
209 |
} |
|
210 |
||
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
211 |
void |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
212 |
metaslab_group_activate(metaslab_group_t *mg) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
213 |
{ |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
214 |
metaslab_class_t *mc = mg->mg_class; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
215 |
metaslab_group_t *mgprev, *mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
216 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
217 |
ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
218 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
219 |
ASSERT(mc->mc_rotor != mg); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
220 |
ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
221 |
ASSERT(mg->mg_next == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
222 |
ASSERT(mg->mg_activation_count <= 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
223 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
224 |
if (++mg->mg_activation_count <= 0) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
225 |
return; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
226 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
227 |
mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
228 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
229 |
if ((mgprev = mc->mc_rotor) == NULL) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
230 |
mg->mg_prev = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
231 |
mg->mg_next = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
232 |
} else { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
233 |
mgnext = mgprev->mg_next; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
234 |
mg->mg_prev = mgprev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
235 |
mg->mg_next = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
236 |
mgprev->mg_next = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
237 |
mgnext->mg_prev = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
238 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
239 |
mc->mc_rotor = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
240 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
241 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
242 |
void |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
243 |
metaslab_group_passivate(metaslab_group_t *mg) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
244 |
{ |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
245 |
metaslab_class_t *mc = mg->mg_class; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
246 |
metaslab_group_t *mgprev, *mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
247 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
248 |
ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
249 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
250 |
if (--mg->mg_activation_count != 0) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
251 |
ASSERT(mc->mc_rotor != mg); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
252 |
ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
253 |
ASSERT(mg->mg_next == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
254 |
ASSERT(mg->mg_activation_count < 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
255 |
return; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
256 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
257 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
258 |
mgprev = mg->mg_prev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
259 |
mgnext = mg->mg_next; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
260 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
261 |
if (mg == mgnext) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
262 |
mc->mc_rotor = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
263 |
} else { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
264 |
mc->mc_rotor = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
265 |
mgprev->mg_next = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
266 |
mgnext->mg_prev = mgprev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
267 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
268 |
|
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
269 |
mg->mg_prev = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
270 |
mg->mg_next = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
271 |
} |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
272 |
|
1732 | 273 |
static void |
274 |
metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) |
|
789 | 275 |
{ |
276 |
mutex_enter(&mg->mg_lock); |
|
277 |
ASSERT(msp->ms_group == NULL); |
|
278 |
msp->ms_group = mg; |
|
1732 | 279 |
msp->ms_weight = 0; |
789 | 280 |
avl_add(&mg->mg_metaslab_tree, msp); |
281 |
mutex_exit(&mg->mg_lock); |
|
282 |
} |
|
283 |
||
1732 | 284 |
static void |
789 | 285 |
metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) |
286 |
{ |
|
287 |
mutex_enter(&mg->mg_lock); |
|
288 |
ASSERT(msp->ms_group == mg); |
|
289 |
avl_remove(&mg->mg_metaslab_tree, msp); |
|
290 |
msp->ms_group = NULL; |
|
291 |
mutex_exit(&mg->mg_lock); |
|
292 |
} |
|
293 |
||
1732 | 294 |
static void |
789 | 295 |
metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) |
296 |
{ |
|
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
297 |
/* |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
298 |
* Although in principle the weight can be any value, in |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
299 |
* practice we do not use values in the range [1, 510]. |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
300 |
*/ |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
301 |
ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0); |
1732 | 302 |
ASSERT(MUTEX_HELD(&msp->ms_lock)); |
303 |
||
789 | 304 |
mutex_enter(&mg->mg_lock); |
305 |
ASSERT(msp->ms_group == mg); |
|
306 |
avl_remove(&mg->mg_metaslab_tree, msp); |
|
307 |
msp->ms_weight = weight; |
|
308 |
avl_add(&mg->mg_metaslab_tree, msp); |
|
309 |
mutex_exit(&mg->mg_lock); |
|
310 |
} |
|
311 |
||
312 |
/* |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
313 |
* This is a helper function that can be used by the allocator to find |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
314 |
* a suitable block to allocate. This will search the specified AVL |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
315 |
* tree looking for a block that matches the specified criteria. |
789 | 316 |
*/ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
317 |
static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
318 |
metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
319 |
uint64_t align) |
789 | 320 |
{ |
321 |
space_seg_t *ss, ssearch; |
|
322 |
avl_index_t where; |
|
323 |
||
324 |
ssearch.ss_start = *cursor; |
|
325 |
ssearch.ss_end = *cursor + size; |
|
326 |
||
327 |
ss = avl_find(t, &ssearch, &where); |
|
328 |
if (ss == NULL) |
|
329 |
ss = avl_nearest(t, where, AVL_AFTER); |
|
330 |
||
331 |
while (ss != NULL) { |
|
332 |
uint64_t offset = P2ROUNDUP(ss->ss_start, align); |
|
333 |
||
334 |
if (offset + size <= ss->ss_end) { |
|
335 |
*cursor = offset + size; |
|
336 |
return (offset); |
|
337 |
} |
|
338 |
ss = AVL_NEXT(t, ss); |
|
339 |
} |
|
340 |
||
1732 | 341 |
/* |
342 |
* If we know we've searched the whole map (*cursor == 0), give up. |
|
343 |
* Otherwise, reset the cursor to the beginning and try again. |
|
344 |
*/ |
|
345 |
if (*cursor == 0) |
|
346 |
return (-1ULL); |
|
347 |
||
348 |
*cursor = 0; |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
349 |
return (metaslab_block_picker(t, cursor, size, align)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
350 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
351 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
352 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
353 |
* ========================================================================== |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
354 |
* The first-fit block allocator |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
355 |
* ========================================================================== |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
356 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
357 |
static void |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
358 |
metaslab_ff_load(space_map_t *sm) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
359 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
360 |
ASSERT(sm->sm_ppd == NULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
361 |
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
362 |
sm->sm_pp_root = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
363 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
364 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
365 |
static void |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
366 |
metaslab_ff_unload(space_map_t *sm) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
367 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
368 |
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
369 |
sm->sm_ppd = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
370 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
371 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
372 |
static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
373 |
metaslab_ff_alloc(space_map_t *sm, uint64_t size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
374 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
375 |
avl_tree_t *t = &sm->sm_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
376 |
uint64_t align = size & -size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
377 |
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
378 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
379 |
return (metaslab_block_picker(t, cursor, size, align)); |
1732 | 380 |
} |
381 |
||
382 |
/* ARGSUSED */ |
|
383 |
static void |
|
384 |
metaslab_ff_claim(space_map_t *sm, uint64_t start, uint64_t size) |
|
385 |
{ |
|
386 |
/* No need to update cursor */ |
|
387 |
} |
|
388 |
||
389 |
/* ARGSUSED */ |
|
390 |
static void |
|
391 |
metaslab_ff_free(space_map_t *sm, uint64_t start, uint64_t size) |
|
392 |
{ |
|
393 |
/* No need to update cursor */ |
|
394 |
} |
|
395 |
||
396 |
static space_map_ops_t metaslab_ff_ops = { |
|
397 |
metaslab_ff_load, |
|
398 |
metaslab_ff_unload, |
|
399 |
metaslab_ff_alloc, |
|
400 |
metaslab_ff_claim, |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
401 |
metaslab_ff_free, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
402 |
NULL /* maxsize */ |
1732 | 403 |
}; |
404 |
||
405 |
/* |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
406 |
* Dynamic block allocator - |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
407 |
* Uses the first fit allocation scheme until space get low and then |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
408 |
* adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
409 |
* and metaslab_df_free_pct to determine when to switch the allocation scheme. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
410 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
411 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
412 |
uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
413 |
metaslab_df_maxsize(space_map_t *sm) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
414 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
415 |
avl_tree_t *t = sm->sm_pp_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
416 |
space_seg_t *ss; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
417 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
418 |
if (t == NULL || (ss = avl_last(t)) == NULL) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
419 |
return (0ULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
420 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
421 |
return (ss->ss_end - ss->ss_start); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
422 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
423 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
424 |
static int |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
425 |
metaslab_df_seg_compare(const void *x1, const void *x2) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
426 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
427 |
const space_seg_t *s1 = x1; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
428 |
const space_seg_t *s2 = x2; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
429 |
uint64_t ss_size1 = s1->ss_end - s1->ss_start; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
430 |
uint64_t ss_size2 = s2->ss_end - s2->ss_start; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
431 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
432 |
if (ss_size1 < ss_size2) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
433 |
return (-1); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
434 |
if (ss_size1 > ss_size2) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
435 |
return (1); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
436 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
437 |
if (s1->ss_start < s2->ss_start) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
438 |
return (-1); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
439 |
if (s1->ss_start > s2->ss_start) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
440 |
return (1); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
441 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
442 |
return (0); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
443 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
444 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
445 |
static void |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
446 |
metaslab_df_load(space_map_t *sm) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
447 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
448 |
space_seg_t *ss; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
449 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
450 |
ASSERT(sm->sm_ppd == NULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
451 |
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
452 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
453 |
sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
454 |
avl_create(sm->sm_pp_root, metaslab_df_seg_compare, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
455 |
sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
456 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
457 |
for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
458 |
avl_add(sm->sm_pp_root, ss); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
459 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
460 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
461 |
static void |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
462 |
metaslab_df_unload(space_map_t *sm) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
463 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
464 |
void *cookie = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
465 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
466 |
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
467 |
sm->sm_ppd = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
468 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
469 |
while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
470 |
/* tear down the tree */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
471 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
472 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
473 |
avl_destroy(sm->sm_pp_root); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
474 |
kmem_free(sm->sm_pp_root, sizeof (avl_tree_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
475 |
sm->sm_pp_root = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
476 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
477 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
478 |
static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
479 |
metaslab_df_alloc(space_map_t *sm, uint64_t size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
480 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
481 |
avl_tree_t *t = &sm->sm_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
482 |
uint64_t align = size & -size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
483 |
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
484 |
uint64_t max_size = metaslab_df_maxsize(sm); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
485 |
int free_pct = sm->sm_space * 100 / sm->sm_size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
486 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
487 |
ASSERT(MUTEX_HELD(sm->sm_lock)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
488 |
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
489 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
490 |
if (max_size < size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
491 |
return (-1ULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
492 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
493 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
494 |
* If we're running low on space switch to using the size |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
495 |
* sorted AVL tree (best-fit). |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
496 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
497 |
if (max_size < metaslab_df_alloc_threshold || |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
498 |
free_pct < metaslab_df_free_pct) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
499 |
t = sm->sm_pp_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
500 |
*cursor = 0; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
501 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
502 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
503 |
return (metaslab_block_picker(t, cursor, size, 1ULL)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
504 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
505 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
506 |
/* ARGSUSED */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
507 |
static void |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
508 |
metaslab_df_claim(space_map_t *sm, uint64_t start, uint64_t size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
509 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
510 |
/* No need to update cursor */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
511 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
512 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
513 |
/* ARGSUSED */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
514 |
static void |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
515 |
metaslab_df_free(space_map_t *sm, uint64_t start, uint64_t size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
516 |
{ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
517 |
/* No need to update cursor */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
518 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
519 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
520 |
static space_map_ops_t metaslab_df_ops = { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
521 |
metaslab_df_load, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
522 |
metaslab_df_unload, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
523 |
metaslab_df_alloc, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
524 |
metaslab_df_claim, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
525 |
metaslab_df_free, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
526 |
metaslab_df_maxsize |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
527 |
}; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
528 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
529 |
space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
530 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
531 |
/* |
1732 | 532 |
* ========================================================================== |
533 |
* Metaslabs |
|
534 |
* ========================================================================== |
|
535 |
*/ |
|
536 |
metaslab_t * |
|
537 |
metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, |
|
538 |
uint64_t start, uint64_t size, uint64_t txg) |
|
539 |
{ |
|
540 |
vdev_t *vd = mg->mg_vd; |
|
541 |
metaslab_t *msp; |
|
542 |
||
543 |
msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); |
|
2856 | 544 |
mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL); |
1732 | 545 |
|
546 |
msp->ms_smo_syncing = *smo; |
|
547 |
||
548 |
/* |
|
549 |
* We create the main space map here, but we don't create the |
|
550 |
* allocmaps and freemaps until metaslab_sync_done(). This serves |
|
551 |
* two purposes: it allows metaslab_sync_done() to detect the |
|
552 |
* addition of new space; and for debugging, it ensures that we'd |
|
553 |
* data fault on any attempt to use this metaslab before it's ready. |
|
554 |
*/ |
|
555 |
space_map_create(&msp->ms_map, start, size, |
|
556 |
vd->vdev_ashift, &msp->ms_lock); |
|
557 |
||
558 |
metaslab_group_add(mg, msp); |
|
559 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
560 |
if (metaslab_debug && smo->smo_object != 0) { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
561 |
mutex_enter(&msp->ms_lock); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
562 |
VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
563 |
SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
564 |
mutex_exit(&msp->ms_lock); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
565 |
} |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
566 |
|
1732 | 567 |
/* |
568 |
* If we're opening an existing pool (txg == 0) or creating |
|
569 |
* a new one (txg == TXG_INITIAL), all space is available now. |
|
570 |
* If we're adding space to an existing pool, the new space |
|
571 |
* does not become available until after this txg has synced. |
|
572 |
*/ |
|
573 |
if (txg <= TXG_INITIAL) |
|
574 |
metaslab_sync_done(msp, 0); |
|
575 |
||
576 |
if (txg != 0) { |
|
577 |
vdev_dirty(vd, 0, NULL, txg); |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
578 |
vdev_dirty(vd, VDD_METASLAB, msp, txg); |
789 | 579 |
} |
580 |
||
1732 | 581 |
return (msp); |
582 |
} |
|
583 |
||
584 |
void |
|
585 |
metaslab_fini(metaslab_t *msp) |
|
586 |
{ |
|
587 |
metaslab_group_t *mg = msp->ms_group; |
|
588 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
589 |
vdev_space_update(mg->mg_vd, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
590 |
-msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size); |
1732 | 591 |
|
592 |
metaslab_group_remove(mg, msp); |
|
593 |
||
594 |
mutex_enter(&msp->ms_lock); |
|
595 |
||
596 |
space_map_unload(&msp->ms_map); |
|
597 |
space_map_destroy(&msp->ms_map); |
|
598 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
599 |
for (int t = 0; t < TXG_SIZE; t++) { |
1732 | 600 |
space_map_destroy(&msp->ms_allocmap[t]); |
601 |
space_map_destroy(&msp->ms_freemap[t]); |
|
602 |
} |
|
603 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
604 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
605 |
space_map_destroy(&msp->ms_defermap[t]); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
606 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
607 |
ASSERT3S(msp->ms_deferspace, ==, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
608 |
|
1732 | 609 |
mutex_exit(&msp->ms_lock); |
2856 | 610 |
mutex_destroy(&msp->ms_lock); |
1732 | 611 |
|
612 |
kmem_free(msp, sizeof (metaslab_t)); |
|
789 | 613 |
} |
614 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
615 |
#define METASLAB_WEIGHT_PRIMARY (1ULL << 63) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
616 |
#define METASLAB_WEIGHT_SECONDARY (1ULL << 62) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
617 |
#define METASLAB_ACTIVE_MASK \ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
618 |
(METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
619 |
#define METASLAB_SMO_BONUS_MULTIPLIER 2 |
1732 | 620 |
|
789 | 621 |
static uint64_t |
1732 | 622 |
metaslab_weight(metaslab_t *msp) |
623 |
{ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
624 |
metaslab_group_t *mg = msp->ms_group; |
1732 | 625 |
space_map_t *sm = &msp->ms_map; |
626 |
space_map_obj_t *smo = &msp->ms_smo; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
627 |
vdev_t *vd = mg->mg_vd; |
1732 | 628 |
uint64_t weight, space; |
629 |
||
630 |
ASSERT(MUTEX_HELD(&msp->ms_lock)); |
|
631 |
||
632 |
/* |
|
633 |
* The baseline weight is the metaslab's free space. |
|
634 |
*/ |
|
635 |
space = sm->sm_size - smo->smo_alloc; |
|
636 |
weight = space; |
|
637 |
||
638 |
/* |
|
639 |
* Modern disks have uniform bit density and constant angular velocity. |
|
640 |
* Therefore, the outer recording zones are faster (higher bandwidth) |
|
641 |
* than the inner zones by the ratio of outer to inner track diameter, |
|
642 |
* which is typically around 2:1. We account for this by assigning |
|
643 |
* higher weight to lower metaslabs (multiplier ranging from 2x to 1x). |
|
644 |
* In effect, this means that we'll select the metaslab with the most |
|
645 |
* free bandwidth rather than simply the one with the most free space. |
|
646 |
*/ |
|
647 |
weight = 2 * weight - |
|
648 |
((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count; |
|
649 |
ASSERT(weight >= space && weight <= 2 * space); |
|
650 |
||
651 |
/* |
|
652 |
* For locality, assign higher weight to metaslabs we've used before. |
|
653 |
*/ |
|
654 |
if (smo->smo_object != 0) |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
655 |
weight *= METASLAB_SMO_BONUS_MULTIPLIER; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
656 |
ASSERT(weight >= space && |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
657 |
weight <= 2 * METASLAB_SMO_BONUS_MULTIPLIER * space); |
1732 | 658 |
|
659 |
/* |
|
660 |
* If this metaslab is one we're actively using, adjust its weight to |
|
661 |
* make it preferable to any inactive metaslab so we'll polish it off. |
|
662 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
663 |
weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK); |
1732 | 664 |
|
665 |
return (weight); |
|
666 |
} |
|
667 |
||
668 |
static int |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
669 |
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) |
789 | 670 |
{ |
671 |
space_map_t *sm = &msp->ms_map; |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
672 |
space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; |
789 | 673 |
|
674 |
ASSERT(MUTEX_HELD(&msp->ms_lock)); |
|
1732 | 675 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
676 |
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
677 |
space_map_load_wait(sm); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
678 |
if (!sm->sm_loaded) { |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
679 |
int error = space_map_load(sm, sm_ops, SM_FREE, |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
680 |
&msp->ms_smo, |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
681 |
spa_meta_objset(msp->ms_group->mg_vd->vdev_spa)); |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
682 |
if (error) { |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
683 |
metaslab_group_sort(msp->ms_group, msp, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
684 |
return (error); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
685 |
} |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
686 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
687 |
space_map_walk(&msp->ms_defermap[t], |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
688 |
space_map_claim, sm); |
1732 | 689 |
} |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
690 |
|
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
691 |
/* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
692 |
* If we were able to load the map then make sure |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
693 |
* that this map is still able to satisfy our request. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
694 |
*/ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
695 |
if (msp->ms_weight < size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
696 |
return (ENOSPC); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
697 |
|
1732 | 698 |
metaslab_group_sort(msp->ms_group, msp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
699 |
msp->ms_weight | activation_weight); |
1732 | 700 |
} |
701 |
ASSERT(sm->sm_loaded); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
702 |
ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); |
1732 | 703 |
|
704 |
return (0); |
|
705 |
} |
|
706 |
||
707 |
static void |
|
708 |
metaslab_passivate(metaslab_t *msp, uint64_t size) |
|
709 |
{ |
|
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
710 |
/* |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
711 |
* If size < SPA_MINBLOCKSIZE, then we will not allocate from |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
712 |
* this metaslab again. In that case, it had better be empty, |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
713 |
* or we would be leaving space on the table. |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
714 |
*/ |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
715 |
#if 0 |
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
716 |
ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
717 |
#endif |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
718 |
metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size)); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
719 |
ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); |
1732 | 720 |
} |
721 |
||
722 |
/* |
|
723 |
* Write a metaslab to disk in the context of the specified transaction group. |
|
724 |
*/ |
|
725 |
void |
|
726 |
metaslab_sync(metaslab_t *msp, uint64_t txg) |
|
727 |
{ |
|
728 |
vdev_t *vd = msp->ms_group->mg_vd; |
|
729 |
spa_t *spa = vd->vdev_spa; |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
730 |
objset_t *mos = spa_meta_objset(spa); |
1732 | 731 |
space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK]; |
732 |
space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK]; |
|
733 |
space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
|
734 |
space_map_t *sm = &msp->ms_map; |
|
735 |
space_map_obj_t *smo = &msp->ms_smo_syncing; |
|
736 |
dmu_buf_t *db; |
|
737 |
dmu_tx_t *tx; |
|
738 |
||
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
739 |
ASSERT(!vd->vdev_ishole); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
740 |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
741 |
if (allocmap->sm_space == 0 && freemap->sm_space == 0) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
742 |
return; |
1732 | 743 |
|
744 |
/* |
|
745 |
* The only state that can actually be changing concurrently with |
|
746 |
* metaslab_sync() is the metaslab's ms_map. No other thread can |
|
747 |
* be modifying this txg's allocmap, freemap, freed_map, or smo. |
|
748 |
* Therefore, we only hold ms_lock to satify space_map ASSERTs. |
|
749 |
* We drop it whenever we call into the DMU, because the DMU |
|
750 |
* can call down to us (e.g. via zio_free()) at any time. |
|
751 |
*/ |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
752 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
753 |
tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); |
1732 | 754 |
|
755 |
if (smo->smo_object == 0) { |
|
756 |
ASSERT(smo->smo_objsize == 0); |
|
757 |
ASSERT(smo->smo_alloc == 0); |
|
758 |
smo->smo_object = dmu_object_alloc(mos, |
|
759 |
DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, |
|
760 |
DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); |
|
761 |
ASSERT(smo->smo_object != 0); |
|
762 |
dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) * |
|
763 |
(sm->sm_start >> vd->vdev_ms_shift), |
|
764 |
sizeof (uint64_t), &smo->smo_object, tx); |
|
765 |
} |
|
766 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
767 |
mutex_enter(&msp->ms_lock); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
768 |
|
1732 | 769 |
space_map_walk(freemap, space_map_add, freed_map); |
770 |
||
771 |
if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >= |
|
772 |
2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) { |
|
773 |
/* |
|
774 |
* The in-core space map representation is twice as compact |
|
775 |
* as the on-disk one, so it's time to condense the latter |
|
776 |
* by generating a pure allocmap from first principles. |
|
777 |
* |
|
778 |
* This metaslab is 100% allocated, |
|
779 |
* minus the content of the in-core map (sm), |
|
780 |
* minus what's been freed this txg (freed_map), |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
781 |
* minus deferred frees (ms_defermap[]), |
1732 | 782 |
* minus allocations from txgs in the future |
783 |
* (because they haven't been committed yet). |
|
784 |
*/ |
|
785 |
space_map_vacate(allocmap, NULL, NULL); |
|
786 |
space_map_vacate(freemap, NULL, NULL); |
|
787 |
||
788 |
space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size); |
|
789 | 789 |
|
1732 | 790 |
space_map_walk(sm, space_map_remove, allocmap); |
791 |
space_map_walk(freed_map, space_map_remove, allocmap); |
|
792 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
793 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
794 |
space_map_walk(&msp->ms_defermap[t], |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
795 |
space_map_remove, allocmap); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
796 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
797 |
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) |
1732 | 798 |
space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK], |
799 |
space_map_remove, allocmap); |
|
800 |
||
801 |
mutex_exit(&msp->ms_lock); |
|
802 |
space_map_truncate(smo, mos, tx); |
|
803 |
mutex_enter(&msp->ms_lock); |
|
789 | 804 |
} |
1732 | 805 |
|
806 |
space_map_sync(allocmap, SM_ALLOC, smo, mos, tx); |
|
807 |
space_map_sync(freemap, SM_FREE, smo, mos, tx); |
|
808 |
||
809 |
mutex_exit(&msp->ms_lock); |
|
810 |
||
811 |
VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); |
|
812 |
dmu_buf_will_dirty(db, tx); |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4527
diff
changeset
|
813 |
ASSERT3U(db->db_size, >=, sizeof (*smo)); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4527
diff
changeset
|
814 |
bcopy(smo, db->db_data, sizeof (*smo)); |
1732 | 815 |
dmu_buf_rele(db, FTAG); |
816 |
||
817 |
dmu_tx_commit(tx); |
|
818 |
} |
|
819 |
||
820 |
/* |
|
821 |
* Called after a transaction group has completely synced to mark |
|
822 |
* all of the metaslab's free space as usable. |
|
823 |
*/ |
|
824 |
void |
|
825 |
metaslab_sync_done(metaslab_t *msp, uint64_t txg) |
|
826 |
{ |
|
827 |
space_map_obj_t *smo = &msp->ms_smo; |
|
828 |
space_map_obj_t *smosync = &msp->ms_smo_syncing; |
|
829 |
space_map_t *sm = &msp->ms_map; |
|
830 |
space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
831 |
space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; |
1732 | 832 |
metaslab_group_t *mg = msp->ms_group; |
833 |
vdev_t *vd = mg->mg_vd; |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
834 |
int64_t alloc_delta, defer_delta; |
1732 | 835 |
|
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
836 |
ASSERT(!vd->vdev_ishole); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
837 |
|
1732 | 838 |
mutex_enter(&msp->ms_lock); |
839 |
||
840 |
/* |
|
841 |
* If this metaslab is just becoming available, initialize its |
|
842 |
* allocmaps and freemaps and add its capacity to the vdev. |
|
843 |
*/ |
|
844 |
if (freed_map->sm_size == 0) { |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
845 |
for (int t = 0; t < TXG_SIZE; t++) { |
1732 | 846 |
space_map_create(&msp->ms_allocmap[t], sm->sm_start, |
847 |
sm->sm_size, sm->sm_shift, sm->sm_lock); |
|
848 |
space_map_create(&msp->ms_freemap[t], sm->sm_start, |
|
849 |
sm->sm_size, sm->sm_shift, sm->sm_lock); |
|
850 |
} |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
851 |
|
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
852 |
for (int t = 0; t < TXG_DEFER_SIZE; t++) |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
853 |
space_map_create(&msp->ms_defermap[t], sm->sm_start, |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
854 |
sm->sm_size, sm->sm_shift, sm->sm_lock); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
855 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
856 |
vdev_space_update(vd, 0, 0, sm->sm_size); |
1732 | 857 |
} |
858 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
859 |
alloc_delta = smosync->smo_alloc - smo->smo_alloc; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
860 |
defer_delta = freed_map->sm_space - defer_map->sm_space; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
861 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
862 |
vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0); |
1732 | 863 |
|
864 |
ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0); |
|
865 |
ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0); |
|
866 |
||
867 |
/* |
|
868 |
* If there's a space_map_load() in progress, wait for it to complete |
|
869 |
* so that we have a consistent view of the in-core space map. |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
870 |
* Then, add defer_map (oldest deferred frees) to this map and |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
871 |
* transfer freed_map (this txg's frees) to defer_map. |
1732 | 872 |
*/ |
873 |
space_map_load_wait(sm); |
|
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
874 |
space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
875 |
space_map_vacate(freed_map, space_map_add, defer_map); |
1732 | 876 |
|
877 |
*smo = *smosync; |
|
878 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
879 |
msp->ms_deferspace += defer_delta; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
880 |
ASSERT3S(msp->ms_deferspace, >=, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
881 |
ASSERT3S(msp->ms_deferspace, <=, sm->sm_size); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
882 |
if (msp->ms_deferspace != 0) { |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
883 |
/* |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
884 |
* Keep syncing this metaslab until all deferred frees |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
885 |
* are back in circulation. |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
886 |
*/ |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
887 |
vdev_dirty(vd, VDD_METASLAB, msp, txg + 1); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
888 |
} |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
889 |
|
1732 | 890 |
/* |
891 |
* If the map is loaded but no longer active, evict it as soon as all |
|
892 |
* future allocations have synced. (If we unloaded it now and then |
|
893 |
* loaded a moment later, the map wouldn't reflect those allocations.) |
|
894 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
895 |
if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { |
1732 | 896 |
int evictable = 1; |
897 |
||
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
898 |
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) |
1732 | 899 |
if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space) |
900 |
evictable = 0; |
|
901 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
902 |
if (evictable && !metaslab_debug) |
1732 | 903 |
space_map_unload(sm); |
904 |
} |
|
905 |
||
906 |
metaslab_group_sort(mg, msp, metaslab_weight(msp)); |
|
907 |
||
908 |
mutex_exit(&msp->ms_lock); |
|
789 | 909 |
} |
910 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
911 |
static uint64_t |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
912 |
metaslab_distance(metaslab_t *msp, dva_t *dva) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
913 |
{ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
914 |
uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
915 |
uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
916 |
uint64_t start = msp->ms_map.sm_start >> ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
917 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
918 |
if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
919 |
return (1ULL << 63); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
920 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
921 |
if (offset < start) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
922 |
return ((start - offset) << ms_shift); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
923 |
if (offset > start) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
924 |
return ((offset - start) << ms_shift); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
925 |
return (0); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
926 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
927 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
928 |
static uint64_t |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
929 |
metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
930 |
uint64_t min_distance, dva_t *dva, int d) |
789 | 931 |
{ |
1732 | 932 |
metaslab_t *msp = NULL; |
933 |
uint64_t offset = -1ULL; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
934 |
avl_tree_t *t = &mg->mg_metaslab_tree; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
935 |
uint64_t activation_weight; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
936 |
uint64_t target_distance; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
937 |
int i; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
938 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
939 |
activation_weight = METASLAB_WEIGHT_PRIMARY; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
940 |
for (i = 0; i < d; i++) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
941 |
if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
942 |
activation_weight = METASLAB_WEIGHT_SECONDARY; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
943 |
break; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
944 |
} |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
945 |
} |
789 | 946 |
|
1732 | 947 |
for (;;) { |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
948 |
boolean_t was_active; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
949 |
|
1732 | 950 |
mutex_enter(&mg->mg_lock); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
951 |
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
952 |
if (msp->ms_weight < size) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
953 |
mutex_exit(&mg->mg_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
954 |
return (-1ULL); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
955 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
956 |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
957 |
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
958 |
if (activation_weight == METASLAB_WEIGHT_PRIMARY) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
959 |
break; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
960 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
961 |
target_distance = min_distance + |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
962 |
(msp->ms_smo.smo_alloc ? 0 : min_distance >> 1); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
963 |
|
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
964 |
for (i = 0; i < d; i++) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
965 |
if (metaslab_distance(msp, &dva[i]) < |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
966 |
target_distance) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
967 |
break; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
968 |
if (i == d) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
969 |
break; |
1732 | 970 |
} |
971 |
mutex_exit(&mg->mg_lock); |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
972 |
if (msp == NULL) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
973 |
return (-1ULL); |
789 | 974 |
|
975 |
mutex_enter(&msp->ms_lock); |
|
1732 | 976 |
|
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
977 |
/* |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
978 |
* Ensure that the metaslab we have selected is still |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
979 |
* capable of handling our request. It's possible that |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
980 |
* another thread may have changed the weight while we |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
981 |
* were blocked on the metaslab lock. |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
982 |
*/ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
983 |
if (msp->ms_weight < size || (was_active && |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
984 |
!(msp->ms_weight & METASLAB_ACTIVE_MASK) && |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
985 |
activation_weight == METASLAB_WEIGHT_PRIMARY)) { |
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
986 |
mutex_exit(&msp->ms_lock); |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
987 |
continue; |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
988 |
} |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
989 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
990 |
if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) && |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
991 |
activation_weight == METASLAB_WEIGHT_PRIMARY) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
992 |
metaslab_passivate(msp, |
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
993 |
msp->ms_weight & ~METASLAB_ACTIVE_MASK); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
994 |
mutex_exit(&msp->ms_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
995 |
continue; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
996 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
997 |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
998 |
if (metaslab_activate(msp, activation_weight, size) != 0) { |
789 | 999 |
mutex_exit(&msp->ms_lock); |
1000 |
continue; |
|
1001 |
} |
|
1732 | 1002 |
|
1003 |
if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) |
|
1004 |
break; |
|
1005 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1006 |
metaslab_passivate(msp, size - 1); |
1732 | 1007 |
|
789 | 1008 |
mutex_exit(&msp->ms_lock); |
1009 |
} |
|
1010 |
||
1732 | 1011 |
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) |
1012 |
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); |
|
1013 |
||
1014 |
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); |
|
1015 |
||
1016 |
mutex_exit(&msp->ms_lock); |
|
1017 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1018 |
return (offset); |
789 | 1019 |
} |
1020 |
||
1021 |
/* |
|
1022 |
* Allocate a block for the specified i/o. |
|
1023 |
*/ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1024 |
static int |
4527 | 1025 |
metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1026 |
dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) |
789 | 1027 |
{ |
1028 |
metaslab_group_t *mg, *rotor; |
|
1029 |
vdev_t *vd; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1030 |
int dshift = 3; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1031 |
int all_zero; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1032 |
int zio_lock = B_FALSE; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1033 |
boolean_t allocatable; |
789 | 1034 |
uint64_t offset = -1ULL; |
1035 |
uint64_t asize; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1036 |
uint64_t distance; |
789 | 1037 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1038 |
ASSERT(!DVA_IS_VALID(&dva[d])); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1039 |
|
789 | 1040 |
/* |
5530 | 1041 |
* For testing, make some blocks above a certain size be gang blocks. |
1042 |
*/ |
|
1043 |
if (psize >= metaslab_gang_bang && (lbolt & 3) == 0) |
|
1044 |
return (ENOSPC); |
|
1045 |
||
1046 |
/* |
|
789 | 1047 |
* Start at the rotor and loop through all mgs until we find something. |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1048 |
* Note that there's no locking on mc_rotor or mc_aliquot because |
789 | 1049 |
* nothing actually breaks if we miss a few updates -- we just won't |
1050 |
* allocate quite as evenly. It all balances out over time. |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1051 |
* |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1052 |
* If we are doing ditto or log blocks, try to spread them across |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1053 |
* consecutive vdevs. If we're forced to reuse a vdev before we've |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1054 |
* allocated all of our ditto blocks, then try and spread them out on |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1055 |
* that vdev as much as possible. If it turns out to not be possible, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1056 |
* gradually lower our standards until anything becomes acceptable. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1057 |
* Also, allocating on consecutive vdevs (as opposed to random vdevs) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1058 |
* gives us hope of containing our fault domains to something we're |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1059 |
* able to reason about. Otherwise, any two top-level vdev failures |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1060 |
* will guarantee the loss of data. With consecutive allocation, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1061 |
* only two adjacent top-level vdev failures will result in data loss. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1062 |
* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1063 |
* If we are doing gang blocks (hintdva is non-NULL), try to keep |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1064 |
* ourselves on the same vdev as our gang block header. That |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1065 |
* way, we can hope for locality in vdev_cache, plus it makes our |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1066 |
* fault domains something tractable. |
789 | 1067 |
*/ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1068 |
if (hintdva) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1069 |
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d])); |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1070 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1071 |
/* |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1072 |
* It's possible the vdev we're using as the hint no |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1073 |
* longer exists (i.e. removed). Consult the rotor when |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1074 |
* all else fails. |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1075 |
*/ |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1076 |
if (vd != NULL) { |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1077 |
mg = vd->vdev_mg; |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1078 |
|
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1079 |
if (flags & METASLAB_HINTBP_AVOID && |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1080 |
mg->mg_next != NULL) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1081 |
mg = mg->mg_next; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1082 |
} else { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1083 |
mg = mc->mc_rotor; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1084 |
} |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1085 |
} else if (d != 0) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1086 |
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1087 |
mg = vd->vdev_mg->mg_next; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1088 |
} else { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1089 |
mg = mc->mc_rotor; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1090 |
} |
4527 | 1091 |
|
1092 |
/* |
|
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1093 |
* If the hint put us into the wrong metaslab class, or into a |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1094 |
* metaslab group that has been passivated, just follow the rotor. |
4527 | 1095 |
*/ |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1096 |
if (mg->mg_class != mc || mg->mg_activation_count <= 0) |
4527 | 1097 |
mg = mc->mc_rotor; |
1098 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1099 |
rotor = mg; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1100 |
top: |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1101 |
all_zero = B_TRUE; |
789 | 1102 |
do { |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1103 |
ASSERT(mg->mg_activation_count == 1); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1104 |
|
789 | 1105 |
vd = mg->mg_vd; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1106 |
|
5329 | 1107 |
/* |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1108 |
* Don't allocate from faulted devices. |
5329 | 1109 |
*/ |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1110 |
if (zio_lock) { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1111 |
spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1112 |
allocatable = vdev_allocatable(vd); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1113 |
spa_config_exit(spa, SCL_ZIO, FTAG); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1114 |
} else { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1115 |
allocatable = vdev_allocatable(vd); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1116 |
} |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1117 |
if (!allocatable) |
5329 | 1118 |
goto next; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1119 |
|
5329 | 1120 |
/* |
1121 |
* Avoid writing single-copy data to a failing vdev |
|
1122 |
*/ |
|
1123 |
if ((vd->vdev_stat.vs_write_errors > 0 || |
|
1124 |
vd->vdev_state < VDEV_STATE_HEALTHY) && |
|
1125 |
d == 0 && dshift == 3) { |
|
1126 |
all_zero = B_FALSE; |
|
1127 |
goto next; |
|
1128 |
} |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1129 |
|
4527 | 1130 |
ASSERT(mg->mg_class == mc); |
1131 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1132 |
distance = vd->vdev_asize >> dshift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1133 |
if (distance <= (1ULL << vd->vdev_ms_shift)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1134 |
distance = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1135 |
else |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1136 |
all_zero = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1137 |
|
789 | 1138 |
asize = vdev_psize_to_asize(vd, psize); |
1139 |
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); |
|
1140 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1141 |
offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1142 |
if (offset != -1ULL) { |
789 | 1143 |
/* |
1144 |
* If we've just selected this metaslab group, |
|
1145 |
* figure out whether the corresponding vdev is |
|
1146 |
* over- or under-used relative to the pool, |
|
1147 |
* and set an allocation bias to even it out. |
|
1148 |
*/ |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1149 |
if (mc->mc_aliquot == 0) { |
789 | 1150 |
vdev_stat_t *vs = &vd->vdev_stat; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1151 |
int64_t vu, cu; |
789 | 1152 |
|
1153 |
/* |
|
1154 |
* Determine percent used in units of 0..1024. |
|
1155 |
* (This is just to avoid floating point.) |
|
1156 |
*/ |
|
1157 |
vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1158 |
cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); |
789 | 1159 |
|
1160 |
/* |
|
1161 |
* Bias by at most +/- 25% of the aliquot. |
|
1162 |
*/ |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1163 |
mg->mg_bias = ((cu - vu) * |
789 | 1164 |
(int64_t)mg->mg_aliquot) / (1024 * 4); |
1165 |
} |
|
1166 |
||
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1167 |
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= |
789 | 1168 |
mg->mg_aliquot + mg->mg_bias) { |
1169 |
mc->mc_rotor = mg->mg_next; |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1170 |
mc->mc_aliquot = 0; |
789 | 1171 |
} |
1172 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1173 |
DVA_SET_VDEV(&dva[d], vd->vdev_id); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1174 |
DVA_SET_OFFSET(&dva[d], offset); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1175 |
DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER)); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1176 |
DVA_SET_ASIZE(&dva[d], asize); |
789 | 1177 |
|
1178 |
return (0); |
|
1179 |
} |
|
5329 | 1180 |
next: |
789 | 1181 |
mc->mc_rotor = mg->mg_next; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1182 |
mc->mc_aliquot = 0; |
789 | 1183 |
} while ((mg = mg->mg_next) != rotor); |
1184 |
||
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1185 |
if (!all_zero) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1186 |
dshift++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1187 |
ASSERT(dshift < 64); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1188 |
goto top; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1189 |
} |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1190 |
|
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1191 |
if (!allocatable && !zio_lock) { |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1192 |
dshift = 3; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1193 |
zio_lock = B_TRUE; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1194 |
goto top; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1195 |
} |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1196 |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1197 |
bzero(&dva[d], sizeof (dva_t)); |
789 | 1198 |
|
1199 |
return (ENOSPC); |
|
1200 |
} |
|
1201 |
||
1202 |
/* |
|
1203 |
* Free the block represented by DVA in the context of the specified |
|
1204 |
* transaction group. |
|
1205 |
*/ |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1206 |
static void |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1207 |
metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now) |
789 | 1208 |
{ |
1209 |
uint64_t vdev = DVA_GET_VDEV(dva); |
|
1210 |
uint64_t offset = DVA_GET_OFFSET(dva); |
|
1211 |
uint64_t size = DVA_GET_ASIZE(dva); |
|
1212 |
vdev_t *vd; |
|
1213 |
metaslab_t *msp; |
|
1214 |
||
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1215 |
ASSERT(DVA_IS_VALID(dva)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1216 |
|
789 | 1217 |
if (txg > spa_freeze_txg(spa)) |
1218 |
return; |
|
1219 |
||
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1220 |
if ((vd = vdev_lookup_top(spa, vdev)) == NULL || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1221 |
(offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1222 |
cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu", |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1223 |
(u_longlong_t)vdev, (u_longlong_t)offset); |
789 | 1224 |
ASSERT(0); |
1225 |
return; |
|
1226 |
} |
|
1227 |
||
1228 |
msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; |
|
1229 |
||
1230 |
if (DVA_GET_GANG(dva)) |
|
1231 |
size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); |
|
1232 |
||
1233 |
mutex_enter(&msp->ms_lock); |
|
1234 |
||
1732 | 1235 |
if (now) { |
1236 |
space_map_remove(&msp->ms_allocmap[txg & TXG_MASK], |
|
1237 |
offset, size); |
|
1238 |
space_map_free(&msp->ms_map, offset, size); |
|
1239 |
} else { |
|
1240 |
if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0) |
|
1241 |
vdev_dirty(vd, VDD_METASLAB, msp, txg); |
|
1242 |
space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size); |
|
789 | 1243 |
} |
1244 |
||
1245 |
mutex_exit(&msp->ms_lock); |
|
1246 |
} |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1247 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1248 |
/* |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1249 |
* Intent log support: upon opening the pool after a crash, notify the SPA |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1250 |
* of blocks that the intent log has allocated for immediate write, but |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1251 |
* which are still considered free by the SPA because the last transaction |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1252 |
* group didn't commit yet. |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1253 |
*/ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1254 |
static int |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1255 |
metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1256 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1257 |
uint64_t vdev = DVA_GET_VDEV(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1258 |
uint64_t offset = DVA_GET_OFFSET(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1259 |
uint64_t size = DVA_GET_ASIZE(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1260 |
vdev_t *vd; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1261 |
metaslab_t *msp; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1262 |
int error = 0; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1263 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1264 |
ASSERT(DVA_IS_VALID(dva)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1265 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1266 |
if ((vd = vdev_lookup_top(spa, vdev)) == NULL || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1267 |
(offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1268 |
return (ENXIO); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1269 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1270 |
msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1271 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1272 |
if (DVA_GET_GANG(dva)) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1273 |
size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1274 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1275 |
mutex_enter(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1276 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1277 |
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1278 |
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1279 |
|
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1280 |
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1281 |
error = ENOENT; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1282 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1283 |
if (error || txg == 0) { /* txg == 0 indicates dry run */ |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1284 |
mutex_exit(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1285 |
return (error); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1286 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1287 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1288 |
space_map_claim(&msp->ms_map, offset, size); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1289 |
|
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1290 |
if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */ |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1291 |
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1292 |
vdev_dirty(vd, VDD_METASLAB, msp, txg); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1293 |
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1294 |
} |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1295 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1296 |
mutex_exit(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1297 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1298 |
return (0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1299 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1300 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1301 |
int |
4527 | 1302 |
metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1303 |
int ndvas, uint64_t txg, blkptr_t *hintbp, int flags) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1304 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1305 |
dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1306 |
dva_t *hintdva = hintbp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1307 |
int error = 0; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1308 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1309 |
ASSERT(bp->blk_birth == 0); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1310 |
ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1311 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1312 |
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1313 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1314 |
if (mc->mc_rotor == NULL) { /* no vdevs in this class */ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1315 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
4527 | 1316 |
return (ENOSPC); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1317 |
} |
4527 | 1318 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1319 |
ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1320 |
ASSERT(BP_GET_NDVAS(bp) == 0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1321 |
ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1322 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1323 |
for (int d = 0; d < ndvas; d++) { |
4527 | 1324 |
error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1325 |
txg, flags); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1326 |
if (error) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1327 |
for (d--; d >= 0; d--) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1328 |
metaslab_free_dva(spa, &dva[d], txg, B_TRUE); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1329 |
bzero(&dva[d], sizeof (dva_t)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1330 |
} |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1331 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1332 |
return (error); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1333 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1334 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1335 |
ASSERT(error == 0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1336 |
ASSERT(BP_GET_NDVAS(bp) == ndvas); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1337 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1338 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1339 |
|
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1340 |
BP_SET_BIRTH(bp, txg, txg); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1341 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1342 |
return (0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1343 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1344 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1345 |
void |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1346 |
metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1347 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1348 |
const dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1349 |
int ndvas = BP_GET_NDVAS(bp); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1350 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1351 |
ASSERT(!BP_IS_HOLE(bp)); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1352 |
ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa)); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1353 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1354 |
spa_config_enter(spa, SCL_FREE, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1355 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1356 |
for (int d = 0; d < ndvas; d++) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1357 |
metaslab_free_dva(spa, &dva[d], txg, now); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1358 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1359 |
spa_config_exit(spa, SCL_FREE, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1360 |
} |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1361 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1362 |
int |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1363 |
metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1364 |
{ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1365 |
const dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1366 |
int ndvas = BP_GET_NDVAS(bp); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1367 |
int error = 0; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1368 |
|
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1369 |
ASSERT(!BP_IS_HOLE(bp)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1370 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1371 |
if (txg != 0) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1372 |
/* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1373 |
* First do a dry run to make sure all DVAs are claimable, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1374 |
* so we don't have to unwind from partial failures below. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1375 |
*/ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1376 |
if ((error = metaslab_claim(spa, bp, 0)) != 0) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1377 |
return (error); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1378 |
} |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1379 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1380 |
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1381 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1382 |
for (int d = 0; d < ndvas; d++) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1383 |
if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0) |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1384 |
break; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1385 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1386 |
spa_config_exit(spa, SCL_ALLOC, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1387 |
|
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1388 |
ASSERT(error == 0 || txg == 0); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1389 |
|
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1390 |
return (error); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1391 |
} |