author | ahrens |
Mon, 29 Oct 2007 17:12:17 -0700 | |
changeset 5367 | c40abbe796be |
parent 4944 | 96d96f8de974 |
child 5378 | 111aa1baa84a |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
3978
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
22 |
* Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/dmu.h> |
|
29 |
#include <sys/dmu_tx.h> |
|
30 |
#include <sys/dsl_dataset.h> |
|
31 |
#include <sys/dsl_dir.h> |
|
32 |
#include <sys/dsl_prop.h> |
|
2199 | 33 |
#include <sys/dsl_synctask.h> |
4543 | 34 |
#include <sys/dsl_deleg.h> |
789 | 35 |
#include <sys/spa.h> |
36 |
#include <sys/zap.h> |
|
37 |
#include <sys/zio.h> |
|
38 |
#include <sys/arc.h> |
|
4543 | 39 |
#include <sys/sunddi.h> |
789 | 40 |
#include "zfs_namecheck.h" |
41 |
||
42 |
static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); |
|
4543 | 43 |
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, |
44 |
cred_t *cr, dmu_tx_t *tx); |
|
789 | 45 |
|
46 |
||
47 |
/* ARGSUSED */ |
|
48 |
static void |
|
49 |
dsl_dir_evict(dmu_buf_t *db, void *arg) |
|
50 |
{ |
|
51 |
dsl_dir_t *dd = arg; |
|
52 |
dsl_pool_t *dp = dd->dd_pool; |
|
53 |
int t; |
|
54 |
||
55 |
for (t = 0; t < TXG_SIZE; t++) { |
|
56 |
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); |
|
57 |
ASSERT(dd->dd_tempreserved[t] == 0); |
|
58 |
ASSERT(dd->dd_space_towrite[t] == 0); |
|
59 |
} |
|
60 |
||
61 |
ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); |
|
62 |
||
63 |
if (dd->dd_parent) |
|
64 |
dsl_dir_close(dd->dd_parent, dd); |
|
65 |
||
66 |
spa_close(dd->dd_pool->dp_spa, dd); |
|
67 |
||
68 |
/* |
|
69 |
* The props callback list should be empty since they hold the |
|
70 |
* dir open. |
|
71 |
*/ |
|
72 |
list_destroy(&dd->dd_prop_cbs); |
|
2856 | 73 |
mutex_destroy(&dd->dd_lock); |
789 | 74 |
kmem_free(dd, sizeof (dsl_dir_t)); |
75 |
} |
|
76 |
||
1544 | 77 |
int |
789 | 78 |
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, |
1544 | 79 |
const char *tail, void *tag, dsl_dir_t **ddp) |
789 | 80 |
{ |
81 |
dmu_buf_t *dbuf; |
|
82 |
dsl_dir_t *dd; |
|
1544 | 83 |
int err; |
789 | 84 |
|
85 |
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || |
|
86 |
dsl_pool_sync_context(dp)); |
|
87 |
||
1544 | 88 |
err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); |
89 |
if (err) |
|
90 |
return (err); |
|
789 | 91 |
dd = dmu_buf_get_user(dbuf); |
92 |
#ifdef ZFS_DEBUG |
|
93 |
{ |
|
94 |
dmu_object_info_t doi; |
|
95 |
dmu_object_info_from_db(dbuf, &doi); |
|
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
96 |
ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); |
789 | 97 |
} |
98 |
#endif |
|
99 |
/* XXX assert bonus buffer size is correct */ |
|
100 |
if (dd == NULL) { |
|
101 |
dsl_dir_t *winner; |
|
102 |
int err; |
|
103 |
||
104 |
dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); |
|
105 |
dd->dd_object = ddobj; |
|
106 |
dd->dd_dbuf = dbuf; |
|
107 |
dd->dd_pool = dp; |
|
108 |
dd->dd_phys = dbuf->db_data; |
|
109 |
dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; |
|
2856 | 110 |
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); |
789 | 111 |
|
112 |
list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), |
|
113 |
offsetof(dsl_prop_cb_record_t, cbr_node)); |
|
114 |
||
115 |
if (dd->dd_phys->dd_parent_obj) { |
|
1544 | 116 |
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, |
117 |
NULL, dd, &dd->dd_parent); |
|
118 |
if (err) { |
|
2856 | 119 |
mutex_destroy(&dd->dd_lock); |
1544 | 120 |
kmem_free(dd, sizeof (dsl_dir_t)); |
121 |
dmu_buf_rele(dbuf, tag); |
|
122 |
return (err); |
|
123 |
} |
|
789 | 124 |
if (tail) { |
125 |
#ifdef ZFS_DEBUG |
|
126 |
uint64_t foundobj; |
|
127 |
||
128 |
err = zap_lookup(dp->dp_meta_objset, |
|
4577 | 129 |
dd->dd_parent->dd_phys->dd_child_dir_zapobj, |
789 | 130 |
tail, sizeof (foundobj), 1, &foundobj); |
1544 | 131 |
ASSERT(err || foundobj == ddobj); |
789 | 132 |
#endif |
133 |
(void) strcpy(dd->dd_myname, tail); |
|
134 |
} else { |
|
135 |
err = zap_value_search(dp->dp_meta_objset, |
|
4577 | 136 |
dd->dd_parent->dd_phys->dd_child_dir_zapobj, |
137 |
ddobj, 0, dd->dd_myname); |
|
1544 | 138 |
} |
139 |
if (err) { |
|
140 |
dsl_dir_close(dd->dd_parent, dd); |
|
2856 | 141 |
mutex_destroy(&dd->dd_lock); |
1544 | 142 |
kmem_free(dd, sizeof (dsl_dir_t)); |
143 |
dmu_buf_rele(dbuf, tag); |
|
144 |
return (err); |
|
789 | 145 |
} |
146 |
} else { |
|
147 |
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); |
|
148 |
} |
|
149 |
||
150 |
winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, |
|
151 |
dsl_dir_evict); |
|
152 |
if (winner) { |
|
153 |
if (dd->dd_parent) |
|
154 |
dsl_dir_close(dd->dd_parent, dd); |
|
2856 | 155 |
mutex_destroy(&dd->dd_lock); |
789 | 156 |
kmem_free(dd, sizeof (dsl_dir_t)); |
157 |
dd = winner; |
|
158 |
} else { |
|
159 |
spa_open_ref(dp->dp_spa, dd); |
|
160 |
} |
|
161 |
} |
|
162 |
||
163 |
/* |
|
164 |
* The dsl_dir_t has both open-to-close and instantiate-to-evict |
|
165 |
* holds on the spa. We need the open-to-close holds because |
|
166 |
* otherwise the spa_refcnt wouldn't change when we open a |
|
167 |
* dir which the spa also has open, so we could incorrectly |
|
168 |
* think it was OK to unload/export/destroy the pool. We need |
|
169 |
* the instantiate-to-evict hold because the dsl_dir_t has a |
|
170 |
* pointer to the dd_pool, which has a pointer to the spa_t. |
|
171 |
*/ |
|
172 |
spa_open_ref(dp->dp_spa, tag); |
|
173 |
ASSERT3P(dd->dd_pool, ==, dp); |
|
174 |
ASSERT3U(dd->dd_object, ==, ddobj); |
|
175 |
ASSERT3P(dd->dd_dbuf, ==, dbuf); |
|
1544 | 176 |
*ddp = dd; |
177 |
return (0); |
|
789 | 178 |
} |
179 |
||
180 |
void |
|
181 |
dsl_dir_close(dsl_dir_t *dd, void *tag) |
|
182 |
{ |
|
183 |
dprintf_dd(dd, "%s\n", ""); |
|
184 |
spa_close(dd->dd_pool->dp_spa, tag); |
|
1544 | 185 |
dmu_buf_rele(dd->dd_dbuf, tag); |
789 | 186 |
} |
187 |
||
2467
9829873580a5
6443585 zpool create of poolname > 250 and < 256 characters panics in debug printout
ek110237
parents:
2206
diff
changeset
|
188 |
/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */ |
789 | 189 |
void |
190 |
dsl_dir_name(dsl_dir_t *dd, char *buf) |
|
191 |
{ |
|
192 |
if (dd->dd_parent) { |
|
193 |
dsl_dir_name(dd->dd_parent, buf); |
|
194 |
(void) strcat(buf, "/"); |
|
195 |
} else { |
|
196 |
buf[0] = '\0'; |
|
197 |
} |
|
198 |
if (!MUTEX_HELD(&dd->dd_lock)) { |
|
199 |
/* |
|
200 |
* recursive mutex so that we can use |
|
201 |
* dprintf_dd() with dd_lock held |
|
202 |
*/ |
|
203 |
mutex_enter(&dd->dd_lock); |
|
204 |
(void) strcat(buf, dd->dd_myname); |
|
205 |
mutex_exit(&dd->dd_lock); |
|
206 |
} else { |
|
207 |
(void) strcat(buf, dd->dd_myname); |
|
208 |
} |
|
209 |
} |
|
210 |
||
3978
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
211 |
/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */ |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
212 |
int |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
213 |
dsl_dir_namelen(dsl_dir_t *dd) |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
214 |
{ |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
215 |
int result = 0; |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
216 |
|
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
217 |
if (dd->dd_parent) { |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
218 |
/* parent's name + 1 for the "/" */ |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
219 |
result = dsl_dir_namelen(dd->dd_parent) + 1; |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
220 |
} |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
221 |
|
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
222 |
if (!MUTEX_HELD(&dd->dd_lock)) { |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
223 |
/* see dsl_dir_name */ |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
224 |
mutex_enter(&dd->dd_lock); |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
225 |
result += strlen(dd->dd_myname); |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
226 |
mutex_exit(&dd->dd_lock); |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
227 |
} else { |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
228 |
result += strlen(dd->dd_myname); |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
229 |
} |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
230 |
|
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
231 |
return (result); |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
232 |
} |
2dd668007b7a
6533813 recursive snapshotting resulted in a bad stack overflow
mmusante
parents:
2885
diff
changeset
|
233 |
|
789 | 234 |
int |
235 |
dsl_dir_is_private(dsl_dir_t *dd) |
|
236 |
{ |
|
237 |
int rv = FALSE; |
|
238 |
||
239 |
if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent)) |
|
240 |
rv = TRUE; |
|
241 |
if (dataset_name_hidden(dd->dd_myname)) |
|
242 |
rv = TRUE; |
|
243 |
return (rv); |
|
244 |
} |
|
245 |
||
246 |
||
247 |
static int |
|
248 |
getcomponent(const char *path, char *component, const char **nextp) |
|
249 |
{ |
|
250 |
char *p; |
|
251 |
if (path == NULL) |
|
2731
2dd6e16860b6
6463349 error message from zpool(1M) is missing a newline
nd150628
parents:
2467
diff
changeset
|
252 |
return (ENOENT); |
789 | 253 |
/* This would be a good place to reserve some namespace... */ |
254 |
p = strpbrk(path, "/@"); |
|
255 |
if (p && (p[1] == '/' || p[1] == '@')) { |
|
256 |
/* two separators in a row */ |
|
257 |
return (EINVAL); |
|
258 |
} |
|
259 |
if (p == NULL || p == path) { |
|
260 |
/* |
|
261 |
* if the first thing is an @ or /, it had better be an |
|
262 |
* @ and it had better not have any more ats or slashes, |
|
263 |
* and it had better have something after the @. |
|
264 |
*/ |
|
265 |
if (p != NULL && |
|
266 |
(p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) |
|
267 |
return (EINVAL); |
|
268 |
if (strlen(path) >= MAXNAMELEN) |
|
269 |
return (ENAMETOOLONG); |
|
270 |
(void) strcpy(component, path); |
|
271 |
p = NULL; |
|
272 |
} else if (p[0] == '/') { |
|
273 |
if (p-path >= MAXNAMELEN) |
|
274 |
return (ENAMETOOLONG); |
|
275 |
(void) strncpy(component, path, p - path); |
|
276 |
component[p-path] = '\0'; |
|
277 |
p++; |
|
278 |
} else if (p[0] == '@') { |
|
279 |
/* |
|
280 |
* if the next separator is an @, there better not be |
|
281 |
* any more slashes. |
|
282 |
*/ |
|
283 |
if (strchr(path, '/')) |
|
284 |
return (EINVAL); |
|
285 |
if (p-path >= MAXNAMELEN) |
|
286 |
return (ENAMETOOLONG); |
|
287 |
(void) strncpy(component, path, p - path); |
|
288 |
component[p-path] = '\0'; |
|
289 |
} else { |
|
290 |
ASSERT(!"invalid p"); |
|
291 |
} |
|
292 |
*nextp = p; |
|
293 |
return (0); |
|
294 |
} |
|
295 |
||
296 |
/* |
|
297 |
* same as dsl_open_dir, ignore the first component of name and use the |
|
298 |
* spa instead |
|
299 |
*/ |
|
1544 | 300 |
int |
301 |
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, |
|
302 |
dsl_dir_t **ddp, const char **tailp) |
|
789 | 303 |
{ |
304 |
char buf[MAXNAMELEN]; |
|
305 |
const char *next, *nextnext = NULL; |
|
306 |
int err; |
|
307 |
dsl_dir_t *dd; |
|
308 |
dsl_pool_t *dp; |
|
309 |
uint64_t ddobj; |
|
310 |
int openedspa = FALSE; |
|
311 |
||
312 |
dprintf("%s\n", name); |
|
313 |
||
314 |
err = getcomponent(name, buf, &next); |
|
315 |
if (err) |
|
1544 | 316 |
return (err); |
789 | 317 |
if (spa == NULL) { |
318 |
err = spa_open(buf, &spa, FTAG); |
|
319 |
if (err) { |
|
320 |
dprintf("spa_open(%s) failed\n", buf); |
|
1544 | 321 |
return (err); |
789 | 322 |
} |
323 |
openedspa = TRUE; |
|
324 |
||
325 |
/* XXX this assertion belongs in spa_open */ |
|
326 |
ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); |
|
327 |
} |
|
328 |
||
329 |
dp = spa_get_dsl(spa); |
|
330 |
||
331 |
rw_enter(&dp->dp_config_rwlock, RW_READER); |
|
1544 | 332 |
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); |
333 |
if (err) { |
|
334 |
rw_exit(&dp->dp_config_rwlock); |
|
335 |
if (openedspa) |
|
336 |
spa_close(spa, FTAG); |
|
337 |
return (err); |
|
338 |
} |
|
339 |
||
789 | 340 |
while (next != NULL) { |
341 |
dsl_dir_t *child_ds; |
|
342 |
err = getcomponent(next, buf, &nextnext); |
|
1544 | 343 |
if (err) |
344 |
break; |
|
789 | 345 |
ASSERT(next[0] != '\0'); |
346 |
if (next[0] == '@') |
|
347 |
break; |
|
348 |
dprintf("looking up %s in obj%lld\n", |
|
349 |
buf, dd->dd_phys->dd_child_dir_zapobj); |
|
350 |
||
351 |
err = zap_lookup(dp->dp_meta_objset, |
|
352 |
dd->dd_phys->dd_child_dir_zapobj, |
|
353 |
buf, sizeof (ddobj), 1, &ddobj); |
|
1544 | 354 |
if (err) { |
355 |
if (err == ENOENT) |
|
356 |
err = 0; |
|
789 | 357 |
break; |
358 |
} |
|
359 |
||
1544 | 360 |
err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); |
361 |
if (err) |
|
362 |
break; |
|
789 | 363 |
dsl_dir_close(dd, tag); |
364 |
dd = child_ds; |
|
365 |
next = nextnext; |
|
366 |
} |
|
367 |
rw_exit(&dp->dp_config_rwlock); |
|
368 |
||
1544 | 369 |
if (err) { |
370 |
dsl_dir_close(dd, tag); |
|
371 |
if (openedspa) |
|
372 |
spa_close(spa, FTAG); |
|
373 |
return (err); |
|
374 |
} |
|
375 |
||
789 | 376 |
/* |
377 |
* It's an error if there's more than one component left, or |
|
378 |
* tailp==NULL and there's any component left. |
|
379 |
*/ |
|
380 |
if (next != NULL && |
|
381 |
(tailp == NULL || (nextnext && nextnext[0] != '\0'))) { |
|
382 |
/* bad path name */ |
|
383 |
dsl_dir_close(dd, tag); |
|
384 |
dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); |
|
1544 | 385 |
err = ENOENT; |
789 | 386 |
} |
387 |
if (tailp) |
|
388 |
*tailp = next; |
|
389 |
if (openedspa) |
|
390 |
spa_close(spa, FTAG); |
|
1544 | 391 |
*ddp = dd; |
392 |
return (err); |
|
789 | 393 |
} |
394 |
||
395 |
/* |
|
396 |
* Return the dsl_dir_t, and possibly the last component which couldn't |
|
397 |
* be found in *tail. Return NULL if the path is bogus, or if |
|
398 |
* tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' |
|
399 |
* means that the last component is a snapshot. |
|
400 |
*/ |
|
1544 | 401 |
int |
402 |
dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) |
|
789 | 403 |
{ |
1544 | 404 |
return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); |
789 | 405 |
} |
406 |
||
2199 | 407 |
uint64_t |
789 | 408 |
dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) |
409 |
{ |
|
410 |
objset_t *mos = pds->dd_pool->dp_meta_objset; |
|
411 |
uint64_t ddobj; |
|
412 |
dsl_dir_phys_t *dsphys; |
|
413 |
dmu_buf_t *dbuf; |
|
414 |
||
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
415 |
ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
416 |
DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); |
2199 | 417 |
VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, |
418 |
name, sizeof (uint64_t), 1, &ddobj, tx)); |
|
1544 | 419 |
VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); |
789 | 420 |
dmu_buf_will_dirty(dbuf, tx); |
421 |
dsphys = dbuf->db_data; |
|
422 |
||
423 |
dsphys->dd_creation_time = gethrestime_sec(); |
|
424 |
dsphys->dd_parent_obj = pds->dd_object; |
|
425 |
dsphys->dd_props_zapobj = zap_create(mos, |
|
426 |
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); |
|
427 |
dsphys->dd_child_dir_zapobj = zap_create(mos, |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
428 |
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); |
1544 | 429 |
dmu_buf_rele(dbuf, FTAG); |
789 | 430 |
|
2199 | 431 |
return (ddobj); |
432 |
} |
|
433 |
||
434 |
/* ARGSUSED */ |
|
435 |
int |
|
436 |
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) |
|
437 |
{ |
|
438 |
dsl_dir_t *dd = arg1; |
|
439 |
dsl_pool_t *dp = dd->dd_pool; |
|
440 |
objset_t *mos = dp->dp_meta_objset; |
|
441 |
int err; |
|
442 |
uint64_t count; |
|
443 |
||
444 |
/* |
|
445 |
* There should be exactly two holds, both from |
|
446 |
* dsl_dataset_destroy: one on the dd directory, and one on its |
|
447 |
* head ds. Otherwise, someone is trying to lookup something |
|
448 |
* inside this dir while we want to destroy it. The |
|
449 |
* config_rwlock ensures that nobody else opens it after we |
|
450 |
* check. |
|
451 |
*/ |
|
452 |
if (dmu_buf_refcount(dd->dd_dbuf) > 2) |
|
453 |
return (EBUSY); |
|
454 |
||
455 |
err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); |
|
456 |
if (err) |
|
457 |
return (err); |
|
458 |
if (count != 0) |
|
459 |
return (EEXIST); |
|
789 | 460 |
|
461 |
return (0); |
|
462 |
} |
|
463 |
||
2199 | 464 |
void |
4543 | 465 |
dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) |
789 | 466 |
{ |
2199 | 467 |
dsl_dir_t *dd = arg1; |
468 |
objset_t *mos = dd->dd_pool->dp_meta_objset; |
|
469 |
uint64_t val, obj; |
|
789 | 470 |
|
2199 | 471 |
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); |
789 | 472 |
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); |
473 |
||
2199 | 474 |
/* Remove our reservation. */ |
789 | 475 |
val = 0; |
4543 | 476 |
dsl_dir_set_reservation_sync(dd, &val, cr, tx); |
789 | 477 |
ASSERT3U(dd->dd_used_bytes, ==, 0); |
478 |
ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); |
|
479 |
||
2199 | 480 |
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); |
481 |
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); |
|
4543 | 482 |
VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); |
2199 | 483 |
VERIFY(0 == zap_remove(mos, |
484 |
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); |
|
789 | 485 |
|
2199 | 486 |
obj = dd->dd_object; |
487 |
dsl_dir_close(dd, tag); |
|
488 |
VERIFY(0 == dmu_object_free(mos, obj, tx)); |
|
789 | 489 |
} |
490 |
||
491 |
void |
|
492 |
dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) |
|
493 |
{ |
|
494 |
dsl_dir_phys_t *dsp; |
|
495 |
dmu_buf_t *dbuf; |
|
496 |
int error; |
|
497 |
||
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
498 |
*ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
499 |
DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); |
789 | 500 |
|
501 |
error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, |
|
502 |
sizeof (uint64_t), 1, ddobjp, tx); |
|
503 |
ASSERT3U(error, ==, 0); |
|
504 |
||
1544 | 505 |
VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf)); |
789 | 506 |
dmu_buf_will_dirty(dbuf, tx); |
507 |
dsp = dbuf->db_data; |
|
508 |
||
509 |
dsp->dd_creation_time = gethrestime_sec(); |
|
510 |
dsp->dd_props_zapobj = zap_create(mos, |
|
511 |
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); |
|
512 |
dsp->dd_child_dir_zapobj = zap_create(mos, |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
513 |
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); |
789 | 514 |
|
1544 | 515 |
dmu_buf_rele(dbuf, FTAG); |
789 | 516 |
} |
517 |
||
518 |
void |
|
2885 | 519 |
dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) |
789 | 520 |
{ |
2885 | 521 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, |
522 |
dsl_dir_space_available(dd, NULL, 0, TRUE)); |
|
789 | 523 |
|
524 |
mutex_enter(&dd->dd_lock); |
|
2885 | 525 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes); |
526 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, |
|
527 |
dd->dd_phys->dd_quota); |
|
528 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, |
|
529 |
dd->dd_phys->dd_reserved); |
|
530 |
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, |
|
531 |
dd->dd_phys->dd_compressed_bytes == 0 ? 100 : |
|
532 |
(dd->dd_phys->dd_uncompressed_bytes * 100 / |
|
533 |
dd->dd_phys->dd_compressed_bytes)); |
|
789 | 534 |
mutex_exit(&dd->dd_lock); |
535 |
||
5367 | 536 |
if (dd->dd_phys->dd_origin_obj) { |
789 | 537 |
dsl_dataset_t *ds; |
2885 | 538 |
char buf[MAXNAMELEN]; |
789 | 539 |
|
540 |
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); |
|
1544 | 541 |
VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
5367 | 542 |
dd->dd_phys->dd_origin_obj, |
1544 | 543 |
NULL, DS_MODE_NONE, FTAG, &ds)); |
2885 | 544 |
dsl_dataset_name(ds, buf); |
789 | 545 |
dsl_dataset_close(ds, DS_MODE_NONE, FTAG); |
546 |
rw_exit(&dd->dd_pool->dp_config_rwlock); |
|
2885 | 547 |
|
548 |
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); |
|
789 | 549 |
} |
550 |
} |
|
551 |
||
552 |
void |
|
553 |
dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) |
|
554 |
{ |
|
555 |
dsl_pool_t *dp = dd->dd_pool; |
|
556 |
||
557 |
ASSERT(dd->dd_phys); |
|
558 |
||
559 |
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { |
|
560 |
/* up the hold count until we can be written out */ |
|
561 |
dmu_buf_add_ref(dd->dd_dbuf, dd); |
|
562 |
} |
|
563 |
} |
|
564 |
||
565 |
static int64_t |
|
566 |
parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) |
|
567 |
{ |
|
568 |
uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); |
|
569 |
uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); |
|
570 |
return (new_accounted - old_accounted); |
|
571 |
} |
|
572 |
||
573 |
void |
|
574 |
dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) |
|
575 |
{ |
|
576 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
577 |
||
578 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
579 |
||
580 |
mutex_enter(&dd->dd_lock); |
|
581 |
ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); |
|
582 |
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, |
|
583 |
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); |
|
584 |
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; |
|
585 |
dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; |
|
586 |
mutex_exit(&dd->dd_lock); |
|
587 |
||
588 |
/* release the hold from dsl_dir_dirty */ |
|
1544 | 589 |
dmu_buf_rele(dd->dd_dbuf, dd); |
789 | 590 |
} |
591 |
||
592 |
static uint64_t |
|
593 |
dsl_dir_estimated_space(dsl_dir_t *dd) |
|
594 |
{ |
|
595 |
int64_t space; |
|
596 |
int i; |
|
597 |
||
598 |
ASSERT(MUTEX_HELD(&dd->dd_lock)); |
|
599 |
||
1544 | 600 |
space = dd->dd_phys->dd_used_bytes; |
789 | 601 |
ASSERT(space >= 0); |
602 |
for (i = 0; i < TXG_SIZE; i++) { |
|
603 |
space += dd->dd_space_towrite[i&TXG_MASK]; |
|
604 |
ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); |
|
605 |
} |
|
606 |
return (space); |
|
607 |
} |
|
608 |
||
609 |
/* |
|
610 |
* How much space would dd have available if ancestor had delta applied |
|
611 |
* to it? If ondiskonly is set, we're only interested in what's |
|
612 |
* on-disk, not estimated pending changes. |
|
613 |
*/ |
|
2885 | 614 |
uint64_t |
789 | 615 |
dsl_dir_space_available(dsl_dir_t *dd, |
616 |
dsl_dir_t *ancestor, int64_t delta, int ondiskonly) |
|
617 |
{ |
|
618 |
uint64_t parentspace, myspace, quota, used; |
|
619 |
||
620 |
/* |
|
621 |
* If there are no restrictions otherwise, assume we have |
|
622 |
* unlimited space available. |
|
623 |
*/ |
|
624 |
quota = UINT64_MAX; |
|
625 |
parentspace = UINT64_MAX; |
|
626 |
||
627 |
if (dd->dd_parent != NULL) { |
|
628 |
parentspace = dsl_dir_space_available(dd->dd_parent, |
|
629 |
ancestor, delta, ondiskonly); |
|
630 |
} |
|
631 |
||
632 |
mutex_enter(&dd->dd_lock); |
|
633 |
if (dd->dd_phys->dd_quota != 0) |
|
634 |
quota = dd->dd_phys->dd_quota; |
|
635 |
if (ondiskonly) { |
|
636 |
used = dd->dd_used_bytes; |
|
637 |
} else { |
|
638 |
used = dsl_dir_estimated_space(dd); |
|
639 |
} |
|
640 |
if (dd == ancestor) |
|
641 |
used += delta; |
|
642 |
||
643 |
if (dd->dd_parent == NULL) { |
|
2082 | 644 |
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); |
789 | 645 |
quota = MIN(quota, poolsize); |
646 |
} |
|
647 |
||
648 |
if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { |
|
649 |
/* |
|
650 |
* We have some space reserved, in addition to what our |
|
651 |
* parent gave us. |
|
652 |
*/ |
|
653 |
parentspace += dd->dd_phys->dd_reserved - used; |
|
654 |
} |
|
655 |
||
656 |
if (used > quota) { |
|
657 |
/* over quota */ |
|
658 |
myspace = 0; |
|
2082 | 659 |
|
660 |
/* |
|
661 |
* While it's OK to be a little over quota, if |
|
662 |
* we think we are using more space than there |
|
663 |
* is in the pool (which is already 1.6% more than |
|
664 |
* dsl_pool_adjustedsize()), something is very |
|
665 |
* wrong. |
|
666 |
*/ |
|
667 |
ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa)); |
|
789 | 668 |
} else { |
669 |
/* |
|
2082 | 670 |
* the lesser of the space provided by our parent and |
671 |
* the space left in our quota |
|
789 | 672 |
*/ |
673 |
myspace = MIN(parentspace, quota - used); |
|
674 |
} |
|
675 |
||
676 |
mutex_exit(&dd->dd_lock); |
|
677 |
||
678 |
return (myspace); |
|
679 |
} |
|
680 |
||
681 |
struct tempreserve { |
|
682 |
list_node_t tr_node; |
|
683 |
dsl_dir_t *tr_ds; |
|
684 |
uint64_t tr_size; |
|
685 |
}; |
|
686 |
||
687 |
/* |
|
688 |
* Reserve space in this dsl_dir, to be used in this tx's txg. |
|
689 |
* After the space has been dirtied (and thus |
|
690 |
* dsl_dir_willuse_space() has been called), the reservation should |
|
691 |
* be canceled, using dsl_dir_tempreserve_clear(). |
|
692 |
*/ |
|
693 |
static int |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
694 |
dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
695 |
boolean_t netfree, boolean_t noquota, list_t *tr_list, dmu_tx_t *tx) |
789 | 696 |
{ |
697 |
uint64_t txg = tx->tx_txg; |
|
698 |
uint64_t est_used, quota, parent_rsrv; |
|
699 |
int edquot = EDQUOT; |
|
700 |
int txgidx = txg & TXG_MASK; |
|
701 |
int i; |
|
702 |
struct tempreserve *tr; |
|
703 |
||
704 |
ASSERT3U(txg, !=, 0); |
|
1544 | 705 |
ASSERT3S(asize, >=, 0); |
789 | 706 |
|
707 |
mutex_enter(&dd->dd_lock); |
|
708 |
/* |
|
709 |
* Check against the dsl_dir's quota. We don't add in the delta |
|
710 |
* when checking for over-quota because they get one free hit. |
|
711 |
*/ |
|
712 |
est_used = dsl_dir_estimated_space(dd); |
|
713 |
for (i = 0; i < TXG_SIZE; i++) |
|
714 |
est_used += dd->dd_tempreserved[i]; |
|
715 |
||
4709
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
716 |
/* |
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
717 |
* If this transaction will result in a net free of space, we want |
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
718 |
* to let it through. |
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
719 |
*/ |
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
720 |
if (netfree || noquota || dd->dd_phys->dd_quota == 0) |
4709
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
721 |
quota = UINT64_MAX; |
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
722 |
else |
789 | 723 |
quota = dd->dd_phys->dd_quota; |
724 |
||
725 |
/* |
|
4709
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
726 |
* Adjust the quota against the actual pool size at the root. |
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
727 |
* To ensure that it's possible to remove files from a full |
dc10a713d1a0
6453407 rm a file when the root file system is at its quota limit reports ENOSPC
maybee
parents:
4577
diff
changeset
|
728 |
* pool without inducing transient overcommits, we throttle |
789 | 729 |
* netfree transactions against a quota that is slightly larger, |
730 |
* but still within the pool's allocation slop. In cases where |
|
731 |
* we're very close to full, this will allow a steady trickle of |
|
732 |
* removes to get through. |
|
733 |
*/ |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
734 |
if (dd->dd_parent == NULL) { |
789 | 735 |
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); |
736 |
if (poolsize < quota) { |
|
737 |
quota = poolsize; |
|
738 |
edquot = ENOSPC; |
|
739 |
} |
|
740 |
} |
|
741 |
||
742 |
/* |
|
743 |
* If they are requesting more space, and our current estimate |
|
744 |
* is over quota. They get to try again unless the actual |
|
1544 | 745 |
* on-disk is over quota and there are no pending changes (which |
746 |
* may free up space for us). |
|
789 | 747 |
*/ |
748 |
if (asize > 0 && est_used > quota) { |
|
1544 | 749 |
if (dd->dd_space_towrite[txg & TXG_MASK] != 0 || |
750 |
dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 || |
|
751 |
dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 || |
|
752 |
dd->dd_used_bytes < quota) |
|
789 | 753 |
edquot = ERESTART; |
754 |
dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " |
|
755 |
"quota=%lluK tr=%lluK err=%d\n", |
|
756 |
dd->dd_used_bytes>>10, est_used>>10, |
|
757 |
quota>>10, asize>>10, edquot); |
|
758 |
mutex_exit(&dd->dd_lock); |
|
759 |
return (edquot); |
|
760 |
} |
|
761 |
||
762 |
/* We need to up our estimated delta before dropping dd_lock */ |
|
763 |
dd->dd_tempreserved[txgidx] += asize; |
|
764 |
||
765 |
parent_rsrv = parent_delta(dd, est_used, asize); |
|
766 |
mutex_exit(&dd->dd_lock); |
|
767 |
||
768 |
tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); |
|
769 |
tr->tr_ds = dd; |
|
770 |
tr->tr_size = asize; |
|
771 |
list_insert_tail(tr_list, tr); |
|
772 |
||
773 |
/* see if it's OK with our parent */ |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
774 |
if (dd->dd_parent && parent_rsrv) { |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
775 |
boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
776 |
|
789 | 777 |
return (dsl_dir_tempreserve_impl(dd->dd_parent, |
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
778 |
parent_rsrv, netfree, ismos, tr_list, tx)); |
789 | 779 |
} else { |
780 |
return (0); |
|
781 |
} |
|
782 |
} |
|
783 |
||
784 |
/* |
|
785 |
* Reserve space in this dsl_dir, to be used in this tx's txg. |
|
786 |
* After the space has been dirtied (and thus |
|
787 |
* dsl_dir_willuse_space() has been called), the reservation should |
|
788 |
* be canceled, using dsl_dir_tempreserve_clear(). |
|
789 |
*/ |
|
790 |
int |
|
791 |
dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, |
|
792 |
uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) |
|
793 |
{ |
|
794 |
int err = 0; |
|
795 |
list_t *tr_list; |
|
796 |
||
797 |
tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); |
|
798 |
list_create(tr_list, sizeof (struct tempreserve), |
|
799 |
offsetof(struct tempreserve, tr_node)); |
|
1544 | 800 |
ASSERT3S(asize, >=, 0); |
801 |
ASSERT3S(fsize, >=, 0); |
|
789 | 802 |
|
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4709
diff
changeset
|
803 |
err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, FALSE, |
789 | 804 |
tr_list, tx); |
805 |
||
806 |
if (err == 0) { |
|
807 |
struct tempreserve *tr; |
|
808 |
||
809 |
err = arc_tempreserve_space(lsize); |
|
810 |
if (err == 0) { |
|
811 |
tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); |
|
812 |
tr->tr_ds = NULL; |
|
813 |
tr->tr_size = lsize; |
|
814 |
list_insert_tail(tr_list, tr); |
|
815 |
} |
|
816 |
} |
|
817 |
||
818 |
if (err) |
|
819 |
dsl_dir_tempreserve_clear(tr_list, tx); |
|
820 |
else |
|
821 |
*tr_cookiep = tr_list; |
|
822 |
return (err); |
|
823 |
} |
|
824 |
||
825 |
/* |
|
826 |
* Clear a temporary reservation that we previously made with |
|
827 |
* dsl_dir_tempreserve_space(). |
|
828 |
*/ |
|
829 |
void |
|
830 |
dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) |
|
831 |
{ |
|
832 |
int txgidx = tx->tx_txg & TXG_MASK; |
|
833 |
list_t *tr_list = tr_cookie; |
|
834 |
struct tempreserve *tr; |
|
835 |
||
836 |
ASSERT3U(tx->tx_txg, !=, 0); |
|
837 |
||
838 |
while (tr = list_head(tr_list)) { |
|
839 |
if (tr->tr_ds == NULL) { |
|
840 |
arc_tempreserve_clear(tr->tr_size); |
|
841 |
} else { |
|
842 |
mutex_enter(&tr->tr_ds->dd_lock); |
|
843 |
ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, |
|
844 |
tr->tr_size); |
|
845 |
tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; |
|
846 |
mutex_exit(&tr->tr_ds->dd_lock); |
|
847 |
} |
|
848 |
list_remove(tr_list, tr); |
|
849 |
kmem_free(tr, sizeof (struct tempreserve)); |
|
850 |
} |
|
851 |
||
852 |
kmem_free(tr_list, sizeof (list_t)); |
|
853 |
} |
|
854 |
||
855 |
/* |
|
856 |
* Call in open context when we think we're going to write/free space, |
|
857 |
* eg. when dirtying data. Be conservative (ie. OK to write less than |
|
858 |
* this or free more than this, but don't write more or free less). |
|
859 |
*/ |
|
860 |
void |
|
861 |
dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) |
|
862 |
{ |
|
863 |
int64_t parent_space; |
|
864 |
uint64_t est_used; |
|
865 |
||
866 |
mutex_enter(&dd->dd_lock); |
|
867 |
if (space > 0) |
|
868 |
dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; |
|
869 |
||
870 |
est_used = dsl_dir_estimated_space(dd); |
|
871 |
parent_space = parent_delta(dd, est_used, space); |
|
872 |
mutex_exit(&dd->dd_lock); |
|
873 |
||
874 |
/* Make sure that we clean up dd_space_to* */ |
|
875 |
dsl_dir_dirty(dd, tx); |
|
876 |
||
877 |
/* XXX this is potentially expensive and unnecessary... */ |
|
878 |
if (parent_space && dd->dd_parent) |
|
879 |
dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); |
|
880 |
} |
|
881 |
||
882 |
/* call from syncing context when we actually write/free space for this dd */ |
|
883 |
void |
|
884 |
dsl_dir_diduse_space(dsl_dir_t *dd, |
|
885 |
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) |
|
886 |
{ |
|
887 |
int64_t accounted_delta; |
|
888 |
||
889 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
890 |
||
891 |
dsl_dir_dirty(dd, tx); |
|
892 |
||
893 |
mutex_enter(&dd->dd_lock); |
|
894 |
accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); |
|
895 |
ASSERT(used >= 0 || dd->dd_used_bytes >= -used); |
|
896 |
ASSERT(compressed >= 0 || |
|
897 |
dd->dd_phys->dd_compressed_bytes >= -compressed); |
|
898 |
ASSERT(uncompressed >= 0 || |
|
899 |
dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); |
|
900 |
dd->dd_used_bytes += used; |
|
901 |
dd->dd_phys->dd_uncompressed_bytes += uncompressed; |
|
902 |
dd->dd_phys->dd_compressed_bytes += compressed; |
|
903 |
mutex_exit(&dd->dd_lock); |
|
904 |
||
905 |
if (dd->dd_parent != NULL) { |
|
906 |
dsl_dir_diduse_space(dd->dd_parent, |
|
907 |
accounted_delta, compressed, uncompressed, tx); |
|
908 |
} |
|
909 |
} |
|
910 |
||
911 |
static int |
|
2199 | 912 |
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) |
789 | 913 |
{ |
2199 | 914 |
dsl_dir_t *dd = arg1; |
915 |
uint64_t *quotap = arg2; |
|
789 | 916 |
uint64_t new_quota = *quotap; |
917 |
int err = 0; |
|
2199 | 918 |
uint64_t towrite; |
919 |
||
920 |
if (new_quota == 0) |
|
921 |
return (0); |
|
922 |
||
923 |
mutex_enter(&dd->dd_lock); |
|
924 |
/* |
|
925 |
* If we are doing the preliminary check in open context, and |
|
926 |
* there are pending changes, then don't fail it, since the |
|
927 |
* pending changes could under-estimat the amount of space to be |
|
928 |
* freed up. |
|
929 |
*/ |
|
930 |
towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] + |
|
931 |
dd->dd_space_towrite[2] + dd->dd_space_towrite[3]; |
|
932 |
if ((dmu_tx_is_syncing(tx) || towrite == 0) && |
|
933 |
(new_quota < dd->dd_phys->dd_reserved || |
|
934 |
new_quota < dsl_dir_estimated_space(dd))) { |
|
935 |
err = ENOSPC; |
|
936 |
} |
|
937 |
mutex_exit(&dd->dd_lock); |
|
938 |
return (err); |
|
939 |
} |
|
940 |
||
4543 | 941 |
/* ARGSUSED */ |
2199 | 942 |
static void |
4543 | 943 |
dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) |
2199 | 944 |
{ |
945 |
dsl_dir_t *dd = arg1; |
|
946 |
uint64_t *quotap = arg2; |
|
947 |
uint64_t new_quota = *quotap; |
|
789 | 948 |
|
949 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
950 |
||
951 |
mutex_enter(&dd->dd_lock); |
|
2199 | 952 |
dd->dd_phys->dd_quota = new_quota; |
789 | 953 |
mutex_exit(&dd->dd_lock); |
4543 | 954 |
|
955 |
spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa, |
|
956 |
tx, cr, "%lld dataset = %llu ", |
|
957 |
(longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj); |
|
789 | 958 |
} |
959 |
||
960 |
int |
|
961 |
dsl_dir_set_quota(const char *ddname, uint64_t quota) |
|
962 |
{ |
|
963 |
dsl_dir_t *dd; |
|
964 |
int err; |
|
965 |
||
1544 | 966 |
err = dsl_dir_open(ddname, FTAG, &dd, NULL); |
967 |
if (err) |
|
968 |
return (err); |
|
789 | 969 |
/* |
970 |
* If someone removes a file, then tries to set the quota, we |
|
971 |
* want to make sure the file freeing takes effect. |
|
972 |
*/ |
|
973 |
txg_wait_open(dd->dd_pool, 0); |
|
974 |
||
2199 | 975 |
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, |
976 |
dsl_dir_set_quota_sync, dd, "a, 0); |
|
789 | 977 |
dsl_dir_close(dd, FTAG); |
978 |
return (err); |
|
979 |
} |
|
980 |
||
981 |
static int |
|
2199 | 982 |
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) |
789 | 983 |
{ |
2199 | 984 |
dsl_dir_t *dd = arg1; |
985 |
uint64_t *reservationp = arg2; |
|
789 | 986 |
uint64_t new_reservation = *reservationp; |
987 |
uint64_t used, avail; |
|
988 |
int64_t delta; |
|
989 |
||
990 |
if (new_reservation > INT64_MAX) |
|
991 |
return (EOVERFLOW); |
|
992 |
||
2199 | 993 |
/* |
994 |
* If we are doing the preliminary check in open context, the |
|
995 |
* space estimates may be inaccurate. |
|
996 |
*/ |
|
997 |
if (!dmu_tx_is_syncing(tx)) |
|
998 |
return (0); |
|
999 |
||
789 | 1000 |
mutex_enter(&dd->dd_lock); |
1001 |
used = dd->dd_used_bytes; |
|
1002 |
delta = MAX(used, new_reservation) - |
|
1003 |
MAX(used, dd->dd_phys->dd_reserved); |
|
1004 |
mutex_exit(&dd->dd_lock); |
|
1005 |
||
1006 |
if (dd->dd_parent) { |
|
1007 |
avail = dsl_dir_space_available(dd->dd_parent, |
|
1008 |
NULL, 0, FALSE); |
|
1009 |
} else { |
|
1010 |
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; |
|
1011 |
} |
|
1012 |
||
1013 |
if (delta > 0 && delta > avail) |
|
1014 |
return (ENOSPC); |
|
1015 |
if (delta > 0 && dd->dd_phys->dd_quota > 0 && |
|
1016 |
new_reservation > dd->dd_phys->dd_quota) |
|
1017 |
return (ENOSPC); |
|
2199 | 1018 |
return (0); |
1019 |
} |
|
1020 |
||
4543 | 1021 |
/* ARGSUSED */ |
2199 | 1022 |
static void |
4543 | 1023 |
dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) |
2199 | 1024 |
{ |
1025 |
dsl_dir_t *dd = arg1; |
|
1026 |
uint64_t *reservationp = arg2; |
|
1027 |
uint64_t new_reservation = *reservationp; |
|
1028 |
uint64_t used; |
|
1029 |
int64_t delta; |
|
1030 |
||
1031 |
mutex_enter(&dd->dd_lock); |
|
1032 |
used = dd->dd_used_bytes; |
|
1033 |
delta = MAX(used, new_reservation) - |
|
1034 |
MAX(used, dd->dd_phys->dd_reserved); |
|
1035 |
mutex_exit(&dd->dd_lock); |
|
789 | 1036 |
|
1037 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
1038 |
dd->dd_phys->dd_reserved = new_reservation; |
|
1039 |
||
1040 |
if (dd->dd_parent != NULL) { |
|
1041 |
/* Roll up this additional usage into our ancestors */ |
|
1042 |
dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); |
|
1043 |
} |
|
4543 | 1044 |
|
1045 |
spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa, |
|
1046 |
tx, cr, "%lld dataset = %llu", |
|
1047 |
(longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj); |
|
789 | 1048 |
} |
1049 |
||
1050 |
int |
|
1051 |
dsl_dir_set_reservation(const char *ddname, uint64_t reservation) |
|
1052 |
{ |
|
1053 |
dsl_dir_t *dd; |
|
1054 |
int err; |
|
1055 |
||
1544 | 1056 |
err = dsl_dir_open(ddname, FTAG, &dd, NULL); |
1057 |
if (err) |
|
1058 |
return (err); |
|
2199 | 1059 |
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, |
1060 |
dsl_dir_set_reservation_sync, dd, &reservation, 0); |
|
789 | 1061 |
dsl_dir_close(dd, FTAG); |
1062 |
return (err); |
|
1063 |
} |
|
1064 |
||
1065 |
static dsl_dir_t * |
|
1066 |
closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) |
|
1067 |
{ |
|
1068 |
for (; ds1; ds1 = ds1->dd_parent) { |
|
1069 |
dsl_dir_t *dd; |
|
1070 |
for (dd = ds2; dd; dd = dd->dd_parent) { |
|
1071 |
if (ds1 == dd) |
|
1072 |
return (dd); |
|
1073 |
} |
|
1074 |
} |
|
1075 |
return (NULL); |
|
1076 |
} |
|
1077 |
||
1078 |
/* |
|
1079 |
* If delta is applied to dd, how much of that delta would be applied to |
|
1080 |
* ancestor? Syncing context only. |
|
1081 |
*/ |
|
1082 |
static int64_t |
|
1083 |
would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) |
|
1084 |
{ |
|
1085 |
if (dd == ancestor) |
|
1086 |
return (delta); |
|
1087 |
||
1088 |
mutex_enter(&dd->dd_lock); |
|
1089 |
delta = parent_delta(dd, dd->dd_used_bytes, delta); |
|
1090 |
mutex_exit(&dd->dd_lock); |
|
1091 |
return (would_change(dd->dd_parent, delta, ancestor)); |
|
1092 |
} |
|
1093 |
||
2199 | 1094 |
struct renamearg { |
1095 |
dsl_dir_t *newparent; |
|
1096 |
const char *mynewname; |
|
1097 |
}; |
|
1098 |
||
4543 | 1099 |
/*ARGSUSED*/ |
2199 | 1100 |
static int |
1101 |
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) |
|
789 | 1102 |
{ |
2199 | 1103 |
dsl_dir_t *dd = arg1; |
1104 |
struct renamearg *ra = arg2; |
|
789 | 1105 |
dsl_pool_t *dp = dd->dd_pool; |
1106 |
objset_t *mos = dp->dp_meta_objset; |
|
2199 | 1107 |
int err; |
1108 |
uint64_t val; |
|
1109 |
||
1110 |
/* There should be 2 references: the open and the dirty */ |
|
1111 |
if (dmu_buf_refcount(dd->dd_dbuf) > 2) |
|
1112 |
return (EBUSY); |
|
789 | 1113 |
|
2199 | 1114 |
/* check for existing name */ |
1115 |
err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, |
|
1116 |
ra->mynewname, 8, 1, &val); |
|
1117 |
if (err == 0) |
|
1118 |
return (EEXIST); |
|
1119 |
if (err != ENOENT) |
|
1544 | 1120 |
return (err); |
789 | 1121 |
|
2199 | 1122 |
if (ra->newparent != dd->dd_parent) { |
2082 | 1123 |
/* is there enough space? */ |
1124 |
uint64_t myspace = |
|
1125 |
MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); |
|
789 | 1126 |
|
2199 | 1127 |
/* no rename into our descendant */ |
1128 |
if (closest_common_ancestor(dd, ra->newparent) == dd) |
|
789 | 1129 |
return (EINVAL); |
2199 | 1130 |
|
1131 |
if (err = dsl_dir_transfer_possible(dd->dd_parent, |
|
1132 |
ra->newparent, myspace)) |
|
1133 |
return (err); |
|
1134 |
} |
|
1135 |
||
1136 |
return (0); |
|
1137 |
} |
|
789 | 1138 |
|
2199 | 1139 |
static void |
4543 | 1140 |
dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) |
2199 | 1141 |
{ |
1142 |
dsl_dir_t *dd = arg1; |
|
1143 |
struct renamearg *ra = arg2; |
|
1144 |
dsl_pool_t *dp = dd->dd_pool; |
|
1145 |
objset_t *mos = dp->dp_meta_objset; |
|
1146 |
int err; |
|
789 | 1147 |
|
2199 | 1148 |
ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); |
1149 |
||
1150 |
if (ra->newparent != dd->dd_parent) { |
|
1151 |
uint64_t myspace = |
|
1152 |
MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); |
|
789 | 1153 |
|
1154 |
dsl_dir_diduse_space(dd->dd_parent, -myspace, |
|
1155 |
-dd->dd_phys->dd_compressed_bytes, |
|
1156 |
-dd->dd_phys->dd_uncompressed_bytes, tx); |
|
2199 | 1157 |
dsl_dir_diduse_space(ra->newparent, myspace, |
789 | 1158 |
dd->dd_phys->dd_compressed_bytes, |
1159 |
dd->dd_phys->dd_uncompressed_bytes, tx); |
|
1160 |
} |
|
1161 |
||
1162 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
1163 |
||
1164 |
/* remove from old parent zapobj */ |
|
1165 |
err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, |
|
1166 |
dd->dd_myname, tx); |
|
1167 |
ASSERT3U(err, ==, 0); |
|
1168 |
||
2199 | 1169 |
(void) strcpy(dd->dd_myname, ra->mynewname); |
789 | 1170 |
dsl_dir_close(dd->dd_parent, dd); |
2199 | 1171 |
dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; |
1544 | 1172 |
VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, |
2199 | 1173 |
ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); |
789 | 1174 |
|
1175 |
/* add to new parent zapobj */ |
|
2199 | 1176 |
err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, |
789 | 1177 |
dd->dd_myname, 8, 1, &dd->dd_object, tx); |
1178 |
ASSERT3U(err, ==, 0); |
|
4543 | 1179 |
|
1180 |
spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, |
|
1181 |
tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); |
|
2199 | 1182 |
} |
789 | 1183 |
|
2199 | 1184 |
int |
1185 |
dsl_dir_rename(dsl_dir_t *dd, const char *newname) |
|
1186 |
{ |
|
1187 |
struct renamearg ra; |
|
1188 |
int err; |
|
1189 |
||
1190 |
/* new parent should exist */ |
|
1191 |
err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); |
|
1192 |
if (err) |
|
1193 |
return (err); |
|
1194 |
||
1195 |
/* can't rename to different pool */ |
|
1196 |
if (dd->dd_pool != ra.newparent->dd_pool) { |
|
1197 |
err = ENXIO; |
|
1198 |
goto out; |
|
1199 |
} |
|
1200 |
||
1201 |
/* new name should not already exist */ |
|
1202 |
if (ra.mynewname == NULL) { |
|
1203 |
err = EEXIST; |
|
1204 |
goto out; |
|
1205 |
} |
|
1206 |
||
1207 |
err = dsl_sync_task_do(dd->dd_pool, |
|
1208 |
dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); |
|
1209 |
||
1210 |
out: |
|
1211 |
dsl_dir_close(ra.newparent, FTAG); |
|
1212 |
return (err); |
|
789 | 1213 |
} |
2082 | 1214 |
|
1215 |
int |
|
1216 |
dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) |
|
1217 |
{ |
|
1218 |
dsl_dir_t *ancestor; |
|
1219 |
int64_t adelta; |
|
1220 |
uint64_t avail; |
|
1221 |
||
1222 |
ancestor = closest_common_ancestor(sdd, tdd); |
|
1223 |
adelta = would_change(sdd, -space, ancestor); |
|
1224 |
avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); |
|
1225 |
if (avail < space) |
|
1226 |
return (ENOSPC); |
|
1227 |
||
1228 |
return (0); |
|
1229 |
} |