author | ahrens |
Fri, 10 Mar 2006 16:27:46 -0800 | |
changeset 1596 | 2e2377ccbf85 |
parent 1544 | 938876158511 |
child 2082 | 76b439ec3ac1 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1356
e021b5e4aa0e
6377671 zfs mount -a shouldn't bother checking snapshots
eschrock
parents:
928
diff
changeset
|
22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/dmu.h> |
|
29 |
#include <sys/dmu_tx.h> |
|
30 |
#include <sys/dsl_dataset.h> |
|
31 |
#include <sys/dsl_dir.h> |
|
32 |
#include <sys/dsl_prop.h> |
|
33 |
#include <sys/spa.h> |
|
34 |
#include <sys/zap.h> |
|
35 |
#include <sys/zio.h> |
|
36 |
#include <sys/arc.h> |
|
37 |
#include "zfs_namecheck.h" |
|
38 |
||
39 |
static uint64_t dsl_dir_space_accounted(dsl_dir_t *dd); |
|
40 |
static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); |
|
41 |
static int dsl_dir_set_reservation_sync(dsl_dir_t *dd, |
|
42 |
void *arg, dmu_tx_t *tx); |
|
43 |
static uint64_t dsl_dir_space_available(dsl_dir_t *dd, |
|
44 |
dsl_dir_t *ancestor, int64_t delta, int ondiskonly); |
|
45 |
||
46 |
||
47 |
/* ARGSUSED */ |
|
48 |
static void |
|
49 |
dsl_dir_evict(dmu_buf_t *db, void *arg) |
|
50 |
{ |
|
51 |
dsl_dir_t *dd = arg; |
|
52 |
dsl_pool_t *dp = dd->dd_pool; |
|
53 |
int t; |
|
54 |
||
55 |
for (t = 0; t < TXG_SIZE; t++) { |
|
56 |
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); |
|
57 |
ASSERT(dd->dd_tempreserved[t] == 0); |
|
58 |
ASSERT(dd->dd_space_towrite[t] == 0); |
|
59 |
} |
|
60 |
||
61 |
ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); |
|
62 |
||
63 |
ASSERT(dd->dd_sync_txg == 0); |
|
64 |
||
65 |
if (dd->dd_parent) |
|
66 |
dsl_dir_close(dd->dd_parent, dd); |
|
67 |
||
68 |
spa_close(dd->dd_pool->dp_spa, dd); |
|
69 |
||
70 |
/* |
|
71 |
* The props callback list should be empty since they hold the |
|
72 |
* dir open. |
|
73 |
*/ |
|
74 |
list_destroy(&dd->dd_prop_cbs); |
|
75 |
kmem_free(dd, sizeof (dsl_dir_t)); |
|
76 |
} |
|
77 |
||
1544 | 78 |
int |
789 | 79 |
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, |
1544 | 80 |
const char *tail, void *tag, dsl_dir_t **ddp) |
789 | 81 |
{ |
82 |
dmu_buf_t *dbuf; |
|
83 |
dsl_dir_t *dd; |
|
1544 | 84 |
int err; |
789 | 85 |
|
86 |
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || |
|
87 |
dsl_pool_sync_context(dp)); |
|
88 |
||
1544 | 89 |
err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); |
90 |
if (err) |
|
91 |
return (err); |
|
789 | 92 |
dd = dmu_buf_get_user(dbuf); |
93 |
#ifdef ZFS_DEBUG |
|
94 |
{ |
|
95 |
dmu_object_info_t doi; |
|
96 |
dmu_object_info_from_db(dbuf, &doi); |
|
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
97 |
ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); |
789 | 98 |
} |
99 |
#endif |
|
100 |
/* XXX assert bonus buffer size is correct */ |
|
101 |
if (dd == NULL) { |
|
102 |
dsl_dir_t *winner; |
|
103 |
int err; |
|
104 |
||
105 |
dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); |
|
106 |
dd->dd_object = ddobj; |
|
107 |
dd->dd_dbuf = dbuf; |
|
108 |
dd->dd_pool = dp; |
|
109 |
dd->dd_phys = dbuf->db_data; |
|
110 |
dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; |
|
111 |
||
112 |
list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), |
|
113 |
offsetof(dsl_prop_cb_record_t, cbr_node)); |
|
114 |
||
115 |
if (dd->dd_phys->dd_parent_obj) { |
|
1544 | 116 |
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, |
117 |
NULL, dd, &dd->dd_parent); |
|
118 |
if (err) { |
|
119 |
kmem_free(dd, sizeof (dsl_dir_t)); |
|
120 |
dmu_buf_rele(dbuf, tag); |
|
121 |
return (err); |
|
122 |
} |
|
789 | 123 |
if (tail) { |
124 |
#ifdef ZFS_DEBUG |
|
125 |
uint64_t foundobj; |
|
126 |
||
127 |
err = zap_lookup(dp->dp_meta_objset, |
|
128 |
dd->dd_parent->dd_phys-> |
|
129 |
dd_child_dir_zapobj, |
|
130 |
tail, sizeof (foundobj), 1, &foundobj); |
|
1544 | 131 |
ASSERT(err || foundobj == ddobj); |
789 | 132 |
#endif |
133 |
(void) strcpy(dd->dd_myname, tail); |
|
134 |
} else { |
|
135 |
err = zap_value_search(dp->dp_meta_objset, |
|
136 |
dd->dd_parent->dd_phys-> |
|
137 |
dd_child_dir_zapobj, |
|
138 |
ddobj, dd->dd_myname); |
|
1544 | 139 |
} |
140 |
if (err) { |
|
141 |
dsl_dir_close(dd->dd_parent, dd); |
|
142 |
kmem_free(dd, sizeof (dsl_dir_t)); |
|
143 |
dmu_buf_rele(dbuf, tag); |
|
144 |
return (err); |
|
789 | 145 |
} |
146 |
} else { |
|
147 |
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); |
|
148 |
} |
|
149 |
||
150 |
winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, |
|
151 |
dsl_dir_evict); |
|
152 |
if (winner) { |
|
153 |
if (dd->dd_parent) |
|
154 |
dsl_dir_close(dd->dd_parent, dd); |
|
155 |
kmem_free(dd, sizeof (dsl_dir_t)); |
|
156 |
dd = winner; |
|
157 |
} else { |
|
158 |
spa_open_ref(dp->dp_spa, dd); |
|
159 |
} |
|
160 |
} |
|
161 |
||
162 |
/* |
|
163 |
* The dsl_dir_t has both open-to-close and instantiate-to-evict |
|
164 |
* holds on the spa. We need the open-to-close holds because |
|
165 |
* otherwise the spa_refcnt wouldn't change when we open a |
|
166 |
* dir which the spa also has open, so we could incorrectly |
|
167 |
* think it was OK to unload/export/destroy the pool. We need |
|
168 |
* the instantiate-to-evict hold because the dsl_dir_t has a |
|
169 |
* pointer to the dd_pool, which has a pointer to the spa_t. |
|
170 |
*/ |
|
171 |
spa_open_ref(dp->dp_spa, tag); |
|
172 |
ASSERT3P(dd->dd_pool, ==, dp); |
|
173 |
ASSERT3U(dd->dd_object, ==, ddobj); |
|
174 |
ASSERT3P(dd->dd_dbuf, ==, dbuf); |
|
1544 | 175 |
*ddp = dd; |
176 |
return (0); |
|
789 | 177 |
} |
178 |
||
179 |
void |
|
180 |
dsl_dir_close(dsl_dir_t *dd, void *tag) |
|
181 |
{ |
|
182 |
dprintf_dd(dd, "%s\n", ""); |
|
183 |
spa_close(dd->dd_pool->dp_spa, tag); |
|
1544 | 184 |
dmu_buf_rele(dd->dd_dbuf, tag); |
789 | 185 |
} |
186 |
||
187 |
/* buf must be long enough (MAXNAMELEN should do) */ |
|
188 |
void |
|
189 |
dsl_dir_name(dsl_dir_t *dd, char *buf) |
|
190 |
{ |
|
191 |
if (dd->dd_parent) { |
|
192 |
dsl_dir_name(dd->dd_parent, buf); |
|
193 |
(void) strcat(buf, "/"); |
|
194 |
} else { |
|
195 |
buf[0] = '\0'; |
|
196 |
} |
|
197 |
if (!MUTEX_HELD(&dd->dd_lock)) { |
|
198 |
/* |
|
199 |
* recursive mutex so that we can use |
|
200 |
* dprintf_dd() with dd_lock held |
|
201 |
*/ |
|
202 |
mutex_enter(&dd->dd_lock); |
|
203 |
(void) strcat(buf, dd->dd_myname); |
|
204 |
mutex_exit(&dd->dd_lock); |
|
205 |
} else { |
|
206 |
(void) strcat(buf, dd->dd_myname); |
|
207 |
} |
|
208 |
} |
|
209 |
||
210 |
int |
|
211 |
dsl_dir_is_private(dsl_dir_t *dd) |
|
212 |
{ |
|
213 |
int rv = FALSE; |
|
214 |
||
215 |
if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent)) |
|
216 |
rv = TRUE; |
|
217 |
if (dataset_name_hidden(dd->dd_myname)) |
|
218 |
rv = TRUE; |
|
219 |
return (rv); |
|
220 |
} |
|
221 |
||
222 |
||
223 |
static int |
|
224 |
getcomponent(const char *path, char *component, const char **nextp) |
|
225 |
{ |
|
226 |
char *p; |
|
227 |
if (path == NULL) |
|
228 |
return (NULL); |
|
229 |
/* This would be a good place to reserve some namespace... */ |
|
230 |
p = strpbrk(path, "/@"); |
|
231 |
if (p && (p[1] == '/' || p[1] == '@')) { |
|
232 |
/* two separators in a row */ |
|
233 |
return (EINVAL); |
|
234 |
} |
|
235 |
if (p == NULL || p == path) { |
|
236 |
/* |
|
237 |
* if the first thing is an @ or /, it had better be an |
|
238 |
* @ and it had better not have any more ats or slashes, |
|
239 |
* and it had better have something after the @. |
|
240 |
*/ |
|
241 |
if (p != NULL && |
|
242 |
(p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) |
|
243 |
return (EINVAL); |
|
244 |
if (strlen(path) >= MAXNAMELEN) |
|
245 |
return (ENAMETOOLONG); |
|
246 |
(void) strcpy(component, path); |
|
247 |
p = NULL; |
|
248 |
} else if (p[0] == '/') { |
|
249 |
if (p-path >= MAXNAMELEN) |
|
250 |
return (ENAMETOOLONG); |
|
251 |
(void) strncpy(component, path, p - path); |
|
252 |
component[p-path] = '\0'; |
|
253 |
p++; |
|
254 |
} else if (p[0] == '@') { |
|
255 |
/* |
|
256 |
* if the next separator is an @, there better not be |
|
257 |
* any more slashes. |
|
258 |
*/ |
|
259 |
if (strchr(path, '/')) |
|
260 |
return (EINVAL); |
|
261 |
if (p-path >= MAXNAMELEN) |
|
262 |
return (ENAMETOOLONG); |
|
263 |
(void) strncpy(component, path, p - path); |
|
264 |
component[p-path] = '\0'; |
|
265 |
} else { |
|
266 |
ASSERT(!"invalid p"); |
|
267 |
} |
|
268 |
*nextp = p; |
|
269 |
return (0); |
|
270 |
} |
|
271 |
||
272 |
/* |
|
273 |
* same as dsl_open_dir, ignore the first component of name and use the |
|
274 |
* spa instead |
|
275 |
*/ |
|
1544 | 276 |
int |
277 |
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, |
|
278 |
dsl_dir_t **ddp, const char **tailp) |
|
789 | 279 |
{ |
280 |
char buf[MAXNAMELEN]; |
|
281 |
const char *next, *nextnext = NULL; |
|
282 |
int err; |
|
283 |
dsl_dir_t *dd; |
|
284 |
dsl_pool_t *dp; |
|
285 |
uint64_t ddobj; |
|
286 |
int openedspa = FALSE; |
|
287 |
||
288 |
dprintf("%s\n", name); |
|
289 |
||
290 |
if (name == NULL) |
|
1544 | 291 |
return (ENOENT); |
789 | 292 |
err = getcomponent(name, buf, &next); |
293 |
if (err) |
|
1544 | 294 |
return (err); |
789 | 295 |
if (spa == NULL) { |
296 |
err = spa_open(buf, &spa, FTAG); |
|
297 |
if (err) { |
|
298 |
dprintf("spa_open(%s) failed\n", buf); |
|
1544 | 299 |
return (err); |
789 | 300 |
} |
301 |
openedspa = TRUE; |
|
302 |
||
303 |
/* XXX this assertion belongs in spa_open */ |
|
304 |
ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); |
|
305 |
} |
|
306 |
||
307 |
dp = spa_get_dsl(spa); |
|
308 |
||
309 |
rw_enter(&dp->dp_config_rwlock, RW_READER); |
|
1544 | 310 |
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); |
311 |
if (err) { |
|
312 |
rw_exit(&dp->dp_config_rwlock); |
|
313 |
if (openedspa) |
|
314 |
spa_close(spa, FTAG); |
|
315 |
return (err); |
|
316 |
} |
|
317 |
||
789 | 318 |
while (next != NULL) { |
319 |
dsl_dir_t *child_ds; |
|
320 |
err = getcomponent(next, buf, &nextnext); |
|
1544 | 321 |
if (err) |
322 |
break; |
|
789 | 323 |
ASSERT(next[0] != '\0'); |
324 |
if (next[0] == '@') |
|
325 |
break; |
|
326 |
if (dd->dd_phys->dd_child_dir_zapobj == 0) |
|
327 |
break; |
|
328 |
dprintf("looking up %s in obj%lld\n", |
|
329 |
buf, dd->dd_phys->dd_child_dir_zapobj); |
|
330 |
||
331 |
err = zap_lookup(dp->dp_meta_objset, |
|
332 |
dd->dd_phys->dd_child_dir_zapobj, |
|
333 |
buf, sizeof (ddobj), 1, &ddobj); |
|
1544 | 334 |
if (err) { |
335 |
if (err == ENOENT) |
|
336 |
err = 0; |
|
789 | 337 |
break; |
338 |
} |
|
339 |
||
1544 | 340 |
err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); |
341 |
if (err) |
|
342 |
break; |
|
789 | 343 |
dsl_dir_close(dd, tag); |
344 |
dd = child_ds; |
|
345 |
next = nextnext; |
|
346 |
} |
|
347 |
rw_exit(&dp->dp_config_rwlock); |
|
348 |
||
1544 | 349 |
if (err) { |
350 |
dsl_dir_close(dd, tag); |
|
351 |
if (openedspa) |
|
352 |
spa_close(spa, FTAG); |
|
353 |
return (err); |
|
354 |
} |
|
355 |
||
789 | 356 |
/* |
357 |
* It's an error if there's more than one component left, or |
|
358 |
* tailp==NULL and there's any component left. |
|
359 |
*/ |
|
360 |
if (next != NULL && |
|
361 |
(tailp == NULL || (nextnext && nextnext[0] != '\0'))) { |
|
362 |
/* bad path name */ |
|
363 |
dsl_dir_close(dd, tag); |
|
364 |
dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); |
|
1544 | 365 |
err = ENOENT; |
789 | 366 |
} |
367 |
if (tailp) |
|
368 |
*tailp = next; |
|
369 |
if (openedspa) |
|
370 |
spa_close(spa, FTAG); |
|
1544 | 371 |
*ddp = dd; |
372 |
return (err); |
|
789 | 373 |
} |
374 |
||
375 |
/* |
|
376 |
* Return the dsl_dir_t, and possibly the last component which couldn't |
|
377 |
* be found in *tail. Return NULL if the path is bogus, or if |
|
378 |
* tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' |
|
379 |
* means that the last component is a snapshot. |
|
380 |
*/ |
|
1544 | 381 |
int |
382 |
dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) |
|
789 | 383 |
{ |
1544 | 384 |
return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); |
789 | 385 |
} |
386 |
||
387 |
int |
|
388 |
dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) |
|
389 |
{ |
|
390 |
objset_t *mos = pds->dd_pool->dp_meta_objset; |
|
391 |
uint64_t ddobj; |
|
392 |
dsl_dir_phys_t *dsphys; |
|
393 |
dmu_buf_t *dbuf; |
|
394 |
int err; |
|
395 |
||
396 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
397 |
||
398 |
if (pds->dd_phys->dd_child_dir_zapobj == 0) { |
|
399 |
dmu_buf_will_dirty(pds->dd_dbuf, tx); |
|
400 |
pds->dd_phys->dd_child_dir_zapobj = zap_create(mos, |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
401 |
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); |
789 | 402 |
} |
403 |
||
404 |
rw_enter(&pds->dd_pool->dp_config_rwlock, RW_WRITER); |
|
405 |
err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, |
|
406 |
name, sizeof (uint64_t), 1, &ddobj); |
|
407 |
if (err != ENOENT) { |
|
408 |
rw_exit(&pds->dd_pool->dp_config_rwlock); |
|
409 |
return (err ? err : EEXIST); |
|
410 |
} |
|
411 |
||
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
412 |
ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
413 |
DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); |
789 | 414 |
err = zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, |
415 |
name, sizeof (uint64_t), 1, &ddobj, tx); |
|
416 |
ASSERT3U(err, ==, 0); |
|
417 |
dprintf("dataset_create: zap_add %s->%lld to %lld returned %d\n", |
|
418 |
name, ddobj, pds->dd_phys->dd_child_dir_zapobj, err); |
|
419 |
||
1544 | 420 |
VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); |
789 | 421 |
dmu_buf_will_dirty(dbuf, tx); |
422 |
dsphys = dbuf->db_data; |
|
423 |
||
424 |
dsphys->dd_creation_time = gethrestime_sec(); |
|
425 |
dsphys->dd_parent_obj = pds->dd_object; |
|
426 |
dsphys->dd_props_zapobj = zap_create(mos, |
|
427 |
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); |
|
428 |
dsphys->dd_child_dir_zapobj = zap_create(mos, |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
429 |
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); |
1544 | 430 |
dmu_buf_rele(dbuf, FTAG); |
789 | 431 |
|
432 |
rw_exit(&pds->dd_pool->dp_config_rwlock); |
|
433 |
||
434 |
return (0); |
|
435 |
} |
|
436 |
||
437 |
int |
|
438 |
dsl_dir_destroy_sync(dsl_dir_t *pds, void *arg, dmu_tx_t *tx) |
|
439 |
{ |
|
440 |
const char *name = arg; |
|
441 |
dsl_dir_t *dd = NULL; |
|
442 |
dsl_pool_t *dp = pds->dd_pool; |
|
443 |
objset_t *mos = dp->dp_meta_objset; |
|
444 |
uint64_t val, obj, child_zapobj, props_zapobj; |
|
445 |
int t, err; |
|
446 |
||
447 |
rw_enter(&dp->dp_config_rwlock, RW_WRITER); |
|
448 |
||
449 |
err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, name, |
|
450 |
8, 1, &obj); |
|
451 |
if (err) |
|
452 |
goto out; |
|
453 |
||
1544 | 454 |
err = dsl_dir_open_obj(dp, obj, name, FTAG, &dd); |
455 |
if (err) |
|
456 |
goto out; |
|
789 | 457 |
ASSERT3U(dd->dd_phys->dd_parent_obj, ==, pds->dd_object); |
458 |
||
459 |
if (dmu_buf_refcount(dd->dd_dbuf) > 1) { |
|
460 |
err = EBUSY; |
|
461 |
goto out; |
|
462 |
} |
|
463 |
||
464 |
for (t = 0; t < TXG_SIZE; t++) { |
|
465 |
/* |
|
466 |
* if they were dirty, they'd also be open. |
|
467 |
* dp_config_rwlock ensures that it stays that way. |
|
468 |
*/ |
|
469 |
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); |
|
470 |
} |
|
471 |
||
472 |
child_zapobj = dd->dd_phys->dd_child_dir_zapobj; |
|
473 |
props_zapobj = dd->dd_phys->dd_props_zapobj; |
|
474 |
||
475 |
if (child_zapobj != 0) { |
|
476 |
uint64_t count; |
|
477 |
err = EEXIST; |
|
478 |
(void) zap_count(mos, child_zapobj, &count); |
|
479 |
if (count != 0) |
|
480 |
goto out; |
|
481 |
} |
|
482 |
||
483 |
if (dd->dd_phys->dd_head_dataset_obj != 0) { |
|
484 |
err = dsl_dataset_destroy_sync(dd, NULL, tx); |
|
485 |
if (err) |
|
486 |
goto out; |
|
487 |
} |
|
488 |
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); |
|
489 |
||
490 |
/* The point of no (unsuccessful) return */ |
|
491 |
||
492 |
/* Make sure parent's used gets updated */ |
|
493 |
val = 0; |
|
494 |
err = dsl_dir_set_reservation_sync(dd, &val, tx); |
|
495 |
ASSERT(err == 0); |
|
496 |
ASSERT3U(dd->dd_used_bytes, ==, 0); |
|
497 |
ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); |
|
498 |
dsl_dir_close(dd, FTAG); |
|
499 |
dd = NULL; |
|
500 |
||
501 |
err = dmu_object_free(mos, obj, tx); |
|
502 |
ASSERT(err == 0); |
|
503 |
||
504 |
if (child_zapobj) |
|
505 |
err = zap_destroy(mos, child_zapobj, tx); |
|
506 |
ASSERT(err == 0); |
|
507 |
||
508 |
if (props_zapobj) |
|
509 |
err = zap_destroy(mos, props_zapobj, tx); |
|
510 |
ASSERT(err == 0); |
|
511 |
||
512 |
err = zap_remove(mos, pds->dd_phys->dd_child_dir_zapobj, name, tx); |
|
513 |
ASSERT(err == 0); |
|
514 |
||
515 |
out: |
|
516 |
rw_exit(&dp->dp_config_rwlock); |
|
517 |
if (dd) |
|
518 |
dsl_dir_close(dd, FTAG); |
|
519 |
||
520 |
return (err); |
|
521 |
} |
|
522 |
||
523 |
void |
|
524 |
dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) |
|
525 |
{ |
|
526 |
dsl_dir_phys_t *dsp; |
|
527 |
dmu_buf_t *dbuf; |
|
528 |
int error; |
|
529 |
||
928
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
530 |
*ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, |
36d72fe4da29
6349314 dmu_object_type names incorrect for DSL Directories and DSL Datasets
tabriz
parents:
885
diff
changeset
|
531 |
DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); |
789 | 532 |
|
533 |
error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, |
|
534 |
sizeof (uint64_t), 1, ddobjp, tx); |
|
535 |
ASSERT3U(error, ==, 0); |
|
536 |
||
1544 | 537 |
VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf)); |
789 | 538 |
dmu_buf_will_dirty(dbuf, tx); |
539 |
dsp = dbuf->db_data; |
|
540 |
||
541 |
dsp->dd_creation_time = gethrestime_sec(); |
|
542 |
dsp->dd_props_zapobj = zap_create(mos, |
|
543 |
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); |
|
544 |
dsp->dd_child_dir_zapobj = zap_create(mos, |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
545 |
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); |
789 | 546 |
|
1544 | 547 |
dmu_buf_rele(dbuf, FTAG); |
789 | 548 |
} |
549 |
||
550 |
void |
|
551 |
dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds) |
|
552 |
{ |
|
553 |
bzero(dds, sizeof (dmu_objset_stats_t)); |
|
554 |
||
555 |
dds->dds_available = dsl_dir_space_available(dd, NULL, 0, TRUE); |
|
556 |
||
557 |
mutex_enter(&dd->dd_lock); |
|
558 |
dds->dds_space_used = dd->dd_used_bytes; |
|
559 |
dds->dds_compressed_bytes = dd->dd_phys->dd_compressed_bytes; |
|
560 |
dds->dds_uncompressed_bytes = dd->dd_phys->dd_uncompressed_bytes; |
|
561 |
dds->dds_quota = dd->dd_phys->dd_quota; |
|
562 |
dds->dds_reserved = dd->dd_phys->dd_reserved; |
|
563 |
mutex_exit(&dd->dd_lock); |
|
564 |
||
565 |
dds->dds_creation_time = dd->dd_phys->dd_creation_time; |
|
566 |
||
567 |
if (dd->dd_phys->dd_clone_parent_obj) { |
|
568 |
dsl_dataset_t *ds; |
|
569 |
||
570 |
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); |
|
1544 | 571 |
VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, |
572 |
dd->dd_phys->dd_clone_parent_obj, |
|
573 |
NULL, DS_MODE_NONE, FTAG, &ds)); |
|
789 | 574 |
dsl_dataset_name(ds, dds->dds_clone_of); |
575 |
dsl_dataset_close(ds, DS_MODE_NONE, FTAG); |
|
576 |
rw_exit(&dd->dd_pool->dp_config_rwlock); |
|
577 |
} |
|
578 |
} |
|
579 |
||
580 |
int |
|
581 |
dsl_dir_sync_task(dsl_dir_t *dd, |
|
582 |
int (*func)(dsl_dir_t *, void*, dmu_tx_t *), void *arg, uint64_t space) |
|
583 |
{ |
|
584 |
dmu_tx_t *tx; |
|
585 |
dsl_pool_t *dp = dd->dd_pool; |
|
586 |
int err = 0; |
|
587 |
uint64_t txg; |
|
588 |
||
589 |
dprintf_dd(dd, "func=%p space=%llu\n", func, space); |
|
590 |
||
591 |
again: |
|
592 |
tx = dmu_tx_create_ds(dd); |
|
593 |
dmu_tx_hold_space(tx, space); |
|
594 |
err = dmu_tx_assign(tx, TXG_WAIT); |
|
595 |
if (err == ENOSPC || err == EDQUOT) { |
|
596 |
dsl_dir_t *rds; |
|
597 |
/* |
|
598 |
* They can get their space from either this dd, or the |
|
599 |
* root dd. |
|
600 |
*/ |
|
601 |
for (rds = dd; rds->dd_parent; rds = rds->dd_parent) |
|
602 |
continue; |
|
603 |
dmu_tx_abort(tx); |
|
604 |
tx = dmu_tx_create_ds(rds); |
|
605 |
dmu_tx_hold_space(tx, space); |
|
606 |
err = dmu_tx_assign(tx, TXG_WAIT); |
|
607 |
} |
|
608 |
if (err) { |
|
609 |
dmu_tx_abort(tx); |
|
610 |
return (err); |
|
611 |
} |
|
612 |
||
613 |
txg = dmu_tx_get_txg(tx); |
|
614 |
mutex_enter(&dd->dd_lock); |
|
615 |
if (dd->dd_sync_txg != 0) { |
|
616 |
mutex_exit(&dd->dd_lock); |
|
617 |
dmu_tx_commit(tx); |
|
618 |
txg_wait_synced(dp, 0); |
|
619 |
goto again; |
|
620 |
} |
|
621 |
||
622 |
/* We're good to go */ |
|
623 |
||
624 |
dd->dd_sync_txg = txg; |
|
625 |
dd->dd_sync_func = func; |
|
626 |
dd->dd_sync_arg = arg; |
|
627 |
||
628 |
mutex_exit(&dd->dd_lock); |
|
629 |
||
630 |
dsl_dir_dirty(dd, tx); |
|
631 |
dmu_tx_commit(tx); |
|
632 |
||
633 |
txg_wait_synced(dp, txg); |
|
634 |
||
635 |
mutex_enter(&dd->dd_lock); |
|
636 |
ASSERT(dd->dd_sync_txg == txg); |
|
637 |
ASSERT(dd->dd_sync_func == NULL); |
|
638 |
err = dd->dd_sync_err; |
|
639 |
dd->dd_sync_txg = 0; |
|
640 |
mutex_exit(&dd->dd_lock); |
|
641 |
||
642 |
return (err); |
|
643 |
} |
|
644 |
||
645 |
void |
|
646 |
dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) |
|
647 |
{ |
|
648 |
dsl_pool_t *dp = dd->dd_pool; |
|
649 |
||
650 |
ASSERT(dd->dd_phys); |
|
651 |
||
652 |
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { |
|
653 |
/* up the hold count until we can be written out */ |
|
654 |
dmu_buf_add_ref(dd->dd_dbuf, dd); |
|
655 |
} |
|
656 |
} |
|
657 |
||
658 |
static int64_t |
|
659 |
parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) |
|
660 |
{ |
|
661 |
uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); |
|
662 |
uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); |
|
663 |
return (new_accounted - old_accounted); |
|
664 |
} |
|
665 |
||
666 |
void |
|
667 |
dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) |
|
668 |
{ |
|
669 |
if (dd->dd_sync_txg == tx->tx_txg && dd->dd_sync_func) { |
|
670 |
dd->dd_sync_err = dd->dd_sync_func(dd, dd->dd_sync_arg, tx); |
|
671 |
dd->dd_sync_func = NULL; |
|
672 |
} |
|
673 |
||
674 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
675 |
||
676 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
677 |
||
678 |
mutex_enter(&dd->dd_lock); |
|
679 |
ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); |
|
680 |
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, |
|
681 |
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); |
|
682 |
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; |
|
683 |
dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; |
|
684 |
mutex_exit(&dd->dd_lock); |
|
685 |
||
686 |
/* release the hold from dsl_dir_dirty */ |
|
1544 | 687 |
dmu_buf_rele(dd->dd_dbuf, dd); |
789 | 688 |
} |
689 |
||
690 |
static uint64_t |
|
691 |
dsl_dir_estimated_space(dsl_dir_t *dd) |
|
692 |
{ |
|
693 |
int64_t space; |
|
694 |
int i; |
|
695 |
||
696 |
ASSERT(MUTEX_HELD(&dd->dd_lock)); |
|
697 |
||
1544 | 698 |
space = dd->dd_phys->dd_used_bytes; |
789 | 699 |
ASSERT(space >= 0); |
700 |
for (i = 0; i < TXG_SIZE; i++) { |
|
701 |
space += dd->dd_space_towrite[i&TXG_MASK]; |
|
702 |
ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); |
|
703 |
} |
|
704 |
return (space); |
|
705 |
} |
|
706 |
||
707 |
/* |
|
708 |
* How much space would dd have available if ancestor had delta applied |
|
709 |
* to it? If ondiskonly is set, we're only interested in what's |
|
710 |
* on-disk, not estimated pending changes. |
|
711 |
*/ |
|
712 |
static uint64_t |
|
713 |
dsl_dir_space_available(dsl_dir_t *dd, |
|
714 |
dsl_dir_t *ancestor, int64_t delta, int ondiskonly) |
|
715 |
{ |
|
716 |
uint64_t parentspace, myspace, quota, used; |
|
717 |
||
718 |
/* |
|
719 |
* If there are no restrictions otherwise, assume we have |
|
720 |
* unlimited space available. |
|
721 |
*/ |
|
722 |
quota = UINT64_MAX; |
|
723 |
parentspace = UINT64_MAX; |
|
724 |
||
725 |
if (dd->dd_parent != NULL) { |
|
726 |
parentspace = dsl_dir_space_available(dd->dd_parent, |
|
727 |
ancestor, delta, ondiskonly); |
|
728 |
} |
|
729 |
||
730 |
mutex_enter(&dd->dd_lock); |
|
731 |
if (dd->dd_phys->dd_quota != 0) |
|
732 |
quota = dd->dd_phys->dd_quota; |
|
733 |
if (ondiskonly) { |
|
734 |
used = dd->dd_used_bytes; |
|
735 |
} else { |
|
736 |
used = dsl_dir_estimated_space(dd); |
|
737 |
} |
|
738 |
if (dd == ancestor) |
|
739 |
used += delta; |
|
740 |
||
741 |
if (dd->dd_parent == NULL) { |
|
742 |
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE); |
|
743 |
quota = MIN(quota, poolsize); |
|
744 |
} |
|
745 |
||
746 |
if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { |
|
747 |
/* |
|
748 |
* We have some space reserved, in addition to what our |
|
749 |
* parent gave us. |
|
750 |
*/ |
|
751 |
parentspace += dd->dd_phys->dd_reserved - used; |
|
752 |
} |
|
753 |
||
754 |
if (used > quota) { |
|
755 |
/* over quota */ |
|
756 |
myspace = 0; |
|
757 |
#ifdef ZFS_DEBUG |
|
758 |
{ |
|
759 |
/* |
|
760 |
* While it's OK to be a little over quota, if |
|
761 |
* we think we are using more space than there |
|
762 |
* is in the pool (which is already 6% more than |
|
763 |
* dsl_pool_adjustedsize()), something is very |
|
764 |
* wrong. |
|
765 |
*/ |
|
766 |
uint64_t space = spa_get_space(dd->dd_pool->dp_spa); |
|
767 |
ASSERT3U(used, <=, space); |
|
768 |
} |
|
769 |
#endif |
|
770 |
} else { |
|
771 |
/* |
|
772 |
* the lesser of parent's space and the space |
|
773 |
* left in our quota |
|
774 |
*/ |
|
775 |
myspace = MIN(parentspace, quota - used); |
|
776 |
} |
|
777 |
||
778 |
mutex_exit(&dd->dd_lock); |
|
779 |
||
780 |
return (myspace); |
|
781 |
} |
|
782 |
||
783 |
struct tempreserve { |
|
784 |
list_node_t tr_node; |
|
785 |
dsl_dir_t *tr_ds; |
|
786 |
uint64_t tr_size; |
|
787 |
}; |
|
788 |
||
789 |
/* |
|
790 |
* Reserve space in this dsl_dir, to be used in this tx's txg. |
|
791 |
* After the space has been dirtied (and thus |
|
792 |
* dsl_dir_willuse_space() has been called), the reservation should |
|
793 |
* be canceled, using dsl_dir_tempreserve_clear(). |
|
794 |
*/ |
|
795 |
static int |
|
796 |
dsl_dir_tempreserve_impl(dsl_dir_t *dd, |
|
797 |
uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx) |
|
798 |
{ |
|
799 |
uint64_t txg = tx->tx_txg; |
|
800 |
uint64_t est_used, quota, parent_rsrv; |
|
801 |
int edquot = EDQUOT; |
|
802 |
int txgidx = txg & TXG_MASK; |
|
803 |
int i; |
|
804 |
struct tempreserve *tr; |
|
805 |
||
806 |
ASSERT3U(txg, !=, 0); |
|
1544 | 807 |
ASSERT3S(asize, >=, 0); |
789 | 808 |
|
809 |
mutex_enter(&dd->dd_lock); |
|
810 |
/* |
|
811 |
* Check against the dsl_dir's quota. We don't add in the delta |
|
812 |
* when checking for over-quota because they get one free hit. |
|
813 |
*/ |
|
814 |
est_used = dsl_dir_estimated_space(dd); |
|
815 |
for (i = 0; i < TXG_SIZE; i++) |
|
816 |
est_used += dd->dd_tempreserved[i]; |
|
817 |
||
818 |
quota = UINT64_MAX; |
|
819 |
||
820 |
if (dd->dd_phys->dd_quota) |
|
821 |
quota = dd->dd_phys->dd_quota; |
|
822 |
||
823 |
/* |
|
824 |
* If this transaction will result in a net free of space, we want |
|
825 |
* to let it through, but we have to be careful: the space that it |
|
826 |
* frees won't become available until *after* this txg syncs. |
|
827 |
* Therefore, to ensure that it's possible to remove files from |
|
828 |
* a full pool without inducing transient overcommits, we throttle |
|
829 |
* netfree transactions against a quota that is slightly larger, |
|
830 |
* but still within the pool's allocation slop. In cases where |
|
831 |
* we're very close to full, this will allow a steady trickle of |
|
832 |
* removes to get through. |
|
833 |
*/ |
|
834 |
if (dd->dd_parent == NULL) { |
|
835 |
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); |
|
836 |
if (poolsize < quota) { |
|
837 |
quota = poolsize; |
|
838 |
edquot = ENOSPC; |
|
839 |
} |
|
840 |
} else if (netfree) { |
|
841 |
quota = UINT64_MAX; |
|
842 |
} |
|
843 |
||
844 |
/* |
|
845 |
* If they are requesting more space, and our current estimate |
|
846 |
* is over quota. They get to try again unless the actual |
|
1544 | 847 |
* on-disk is over quota and there are no pending changes (which |
848 |
* may free up space for us). |
|
789 | 849 |
*/ |
850 |
if (asize > 0 && est_used > quota) { |
|
1544 | 851 |
if (dd->dd_space_towrite[txg & TXG_MASK] != 0 || |
852 |
dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 || |
|
853 |
dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 || |
|
854 |
dd->dd_used_bytes < quota) |
|
789 | 855 |
edquot = ERESTART; |
856 |
dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " |
|
857 |
"quota=%lluK tr=%lluK err=%d\n", |
|
858 |
dd->dd_used_bytes>>10, est_used>>10, |
|
859 |
quota>>10, asize>>10, edquot); |
|
860 |
mutex_exit(&dd->dd_lock); |
|
861 |
return (edquot); |
|
862 |
} |
|
863 |
||
864 |
/* We need to up our estimated delta before dropping dd_lock */ |
|
865 |
dd->dd_tempreserved[txgidx] += asize; |
|
866 |
||
867 |
parent_rsrv = parent_delta(dd, est_used, asize); |
|
868 |
mutex_exit(&dd->dd_lock); |
|
869 |
||
870 |
tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); |
|
871 |
tr->tr_ds = dd; |
|
872 |
tr->tr_size = asize; |
|
873 |
list_insert_tail(tr_list, tr); |
|
874 |
||
875 |
/* see if it's OK with our parent */ |
|
876 |
if (dd->dd_parent && parent_rsrv) { |
|
877 |
return (dsl_dir_tempreserve_impl(dd->dd_parent, |
|
878 |
parent_rsrv, netfree, tr_list, tx)); |
|
879 |
} else { |
|
880 |
return (0); |
|
881 |
} |
|
882 |
} |
|
883 |
||
884 |
/* |
|
885 |
* Reserve space in this dsl_dir, to be used in this tx's txg. |
|
886 |
* After the space has been dirtied (and thus |
|
887 |
* dsl_dir_willuse_space() has been called), the reservation should |
|
888 |
* be canceled, using dsl_dir_tempreserve_clear(). |
|
889 |
*/ |
|
890 |
int |
|
891 |
dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, |
|
892 |
uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) |
|
893 |
{ |
|
894 |
int err = 0; |
|
895 |
list_t *tr_list; |
|
896 |
||
897 |
tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); |
|
898 |
list_create(tr_list, sizeof (struct tempreserve), |
|
899 |
offsetof(struct tempreserve, tr_node)); |
|
1544 | 900 |
ASSERT3S(asize, >=, 0); |
901 |
ASSERT3S(fsize, >=, 0); |
|
789 | 902 |
|
903 |
err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, |
|
904 |
tr_list, tx); |
|
905 |
||
906 |
if (err == 0) { |
|
907 |
struct tempreserve *tr; |
|
908 |
||
909 |
err = arc_tempreserve_space(lsize); |
|
910 |
if (err == 0) { |
|
911 |
tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); |
|
912 |
tr->tr_ds = NULL; |
|
913 |
tr->tr_size = lsize; |
|
914 |
list_insert_tail(tr_list, tr); |
|
915 |
} |
|
916 |
} |
|
917 |
||
918 |
if (err) |
|
919 |
dsl_dir_tempreserve_clear(tr_list, tx); |
|
920 |
else |
|
921 |
*tr_cookiep = tr_list; |
|
922 |
return (err); |
|
923 |
} |
|
924 |
||
925 |
/* |
|
926 |
* Clear a temporary reservation that we previously made with |
|
927 |
* dsl_dir_tempreserve_space(). |
|
928 |
*/ |
|
929 |
void |
|
930 |
dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) |
|
931 |
{ |
|
932 |
int txgidx = tx->tx_txg & TXG_MASK; |
|
933 |
list_t *tr_list = tr_cookie; |
|
934 |
struct tempreserve *tr; |
|
935 |
||
936 |
ASSERT3U(tx->tx_txg, !=, 0); |
|
937 |
||
938 |
while (tr = list_head(tr_list)) { |
|
939 |
if (tr->tr_ds == NULL) { |
|
940 |
arc_tempreserve_clear(tr->tr_size); |
|
941 |
} else { |
|
942 |
mutex_enter(&tr->tr_ds->dd_lock); |
|
943 |
ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, |
|
944 |
tr->tr_size); |
|
945 |
tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; |
|
946 |
mutex_exit(&tr->tr_ds->dd_lock); |
|
947 |
} |
|
948 |
list_remove(tr_list, tr); |
|
949 |
kmem_free(tr, sizeof (struct tempreserve)); |
|
950 |
} |
|
951 |
||
952 |
kmem_free(tr_list, sizeof (list_t)); |
|
953 |
} |
|
954 |
||
955 |
/* |
|
956 |
* Call in open context when we think we're going to write/free space, |
|
957 |
* eg. when dirtying data. Be conservative (ie. OK to write less than |
|
958 |
* this or free more than this, but don't write more or free less). |
|
959 |
*/ |
|
960 |
void |
|
961 |
dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) |
|
962 |
{ |
|
963 |
int64_t parent_space; |
|
964 |
uint64_t est_used; |
|
965 |
||
966 |
mutex_enter(&dd->dd_lock); |
|
967 |
if (space > 0) |
|
968 |
dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; |
|
969 |
||
970 |
est_used = dsl_dir_estimated_space(dd); |
|
971 |
parent_space = parent_delta(dd, est_used, space); |
|
972 |
mutex_exit(&dd->dd_lock); |
|
973 |
||
974 |
/* Make sure that we clean up dd_space_to* */ |
|
975 |
dsl_dir_dirty(dd, tx); |
|
976 |
||
977 |
/* XXX this is potentially expensive and unnecessary... */ |
|
978 |
if (parent_space && dd->dd_parent) |
|
979 |
dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); |
|
980 |
} |
|
981 |
||
982 |
/* call from syncing context when we actually write/free space for this dd */ |
|
983 |
void |
|
984 |
dsl_dir_diduse_space(dsl_dir_t *dd, |
|
985 |
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) |
|
986 |
{ |
|
987 |
int64_t accounted_delta; |
|
988 |
||
989 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
990 |
||
991 |
dsl_dir_dirty(dd, tx); |
|
992 |
||
993 |
mutex_enter(&dd->dd_lock); |
|
994 |
accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); |
|
995 |
ASSERT(used >= 0 || dd->dd_used_bytes >= -used); |
|
996 |
ASSERT(compressed >= 0 || |
|
997 |
dd->dd_phys->dd_compressed_bytes >= -compressed); |
|
998 |
ASSERT(uncompressed >= 0 || |
|
999 |
dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); |
|
1000 |
dd->dd_used_bytes += used; |
|
1001 |
dd->dd_phys->dd_uncompressed_bytes += uncompressed; |
|
1002 |
dd->dd_phys->dd_compressed_bytes += compressed; |
|
1003 |
mutex_exit(&dd->dd_lock); |
|
1004 |
||
1005 |
if (dd->dd_parent != NULL) { |
|
1006 |
dsl_dir_diduse_space(dd->dd_parent, |
|
1007 |
accounted_delta, compressed, uncompressed, tx); |
|
1008 |
} |
|
1009 |
} |
|
1010 |
||
1011 |
static int |
|
1012 |
dsl_dir_set_quota_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) |
|
1013 |
{ |
|
1014 |
uint64_t *quotap = arg; |
|
1015 |
uint64_t new_quota = *quotap; |
|
1016 |
int err = 0; |
|
1017 |
||
1018 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
1019 |
||
1020 |
mutex_enter(&dd->dd_lock); |
|
1021 |
if (new_quota != 0 && (new_quota < dd->dd_phys->dd_reserved || |
|
1022 |
new_quota < dsl_dir_estimated_space(dd))) { |
|
1023 |
err = ENOSPC; |
|
1024 |
} else { |
|
1025 |
dd->dd_phys->dd_quota = new_quota; |
|
1026 |
} |
|
1027 |
mutex_exit(&dd->dd_lock); |
|
1028 |
return (err); |
|
1029 |
} |
|
1030 |
||
1031 |
int |
|
1032 |
dsl_dir_set_quota(const char *ddname, uint64_t quota) |
|
1033 |
{ |
|
1034 |
dsl_dir_t *dd; |
|
1035 |
int err; |
|
1036 |
||
1544 | 1037 |
err = dsl_dir_open(ddname, FTAG, &dd, NULL); |
1038 |
if (err) |
|
1039 |
return (err); |
|
789 | 1040 |
/* |
1041 |
* If someone removes a file, then tries to set the quota, we |
|
1042 |
* want to make sure the file freeing takes effect. |
|
1043 |
*/ |
|
1044 |
txg_wait_open(dd->dd_pool, 0); |
|
1045 |
||
1046 |
err = dsl_dir_sync_task(dd, dsl_dir_set_quota_sync, "a, 0); |
|
1047 |
dsl_dir_close(dd, FTAG); |
|
1048 |
return (err); |
|
1049 |
} |
|
1050 |
||
1051 |
static int |
|
1052 |
dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) |
|
1053 |
{ |
|
1054 |
uint64_t *reservationp = arg; |
|
1055 |
uint64_t new_reservation = *reservationp; |
|
1056 |
uint64_t used, avail; |
|
1057 |
int64_t delta; |
|
1058 |
||
1059 |
if (new_reservation > INT64_MAX) |
|
1060 |
return (EOVERFLOW); |
|
1061 |
||
1062 |
mutex_enter(&dd->dd_lock); |
|
1063 |
used = dd->dd_used_bytes; |
|
1064 |
delta = MAX(used, new_reservation) - |
|
1065 |
MAX(used, dd->dd_phys->dd_reserved); |
|
1066 |
mutex_exit(&dd->dd_lock); |
|
1067 |
||
1068 |
if (dd->dd_parent) { |
|
1069 |
avail = dsl_dir_space_available(dd->dd_parent, |
|
1070 |
NULL, 0, FALSE); |
|
1071 |
} else { |
|
1072 |
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; |
|
1073 |
} |
|
1074 |
||
1075 |
if (delta > 0 && delta > avail) |
|
1076 |
return (ENOSPC); |
|
1077 |
if (delta > 0 && dd->dd_phys->dd_quota > 0 && |
|
1078 |
new_reservation > dd->dd_phys->dd_quota) |
|
1079 |
return (ENOSPC); |
|
1080 |
||
1081 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
1082 |
dd->dd_phys->dd_reserved = new_reservation; |
|
1083 |
||
1084 |
if (dd->dd_parent != NULL) { |
|
1085 |
/* Roll up this additional usage into our ancestors */ |
|
1086 |
dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); |
|
1087 |
} |
|
1088 |
return (0); |
|
1089 |
} |
|
1090 |
||
1091 |
int |
|
1092 |
dsl_dir_set_reservation(const char *ddname, uint64_t reservation) |
|
1093 |
{ |
|
1094 |
dsl_dir_t *dd; |
|
1095 |
int err; |
|
1096 |
||
1544 | 1097 |
err = dsl_dir_open(ddname, FTAG, &dd, NULL); |
1098 |
if (err) |
|
1099 |
return (err); |
|
789 | 1100 |
err = dsl_dir_sync_task(dd, |
1101 |
dsl_dir_set_reservation_sync, &reservation, 0); |
|
1102 |
dsl_dir_close(dd, FTAG); |
|
1103 |
return (err); |
|
1104 |
} |
|
1105 |
||
1106 |
static dsl_dir_t * |
|
1107 |
closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) |
|
1108 |
{ |
|
1109 |
for (; ds1; ds1 = ds1->dd_parent) { |
|
1110 |
dsl_dir_t *dd; |
|
1111 |
for (dd = ds2; dd; dd = dd->dd_parent) { |
|
1112 |
if (ds1 == dd) |
|
1113 |
return (dd); |
|
1114 |
} |
|
1115 |
} |
|
1116 |
return (NULL); |
|
1117 |
} |
|
1118 |
||
1119 |
/* |
|
1120 |
* If delta is applied to dd, how much of that delta would be applied to |
|
1121 |
* ancestor? Syncing context only. |
|
1122 |
*/ |
|
1123 |
static int64_t |
|
1124 |
would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) |
|
1125 |
{ |
|
1126 |
if (dd == ancestor) |
|
1127 |
return (delta); |
|
1128 |
||
1129 |
mutex_enter(&dd->dd_lock); |
|
1130 |
delta = parent_delta(dd, dd->dd_used_bytes, delta); |
|
1131 |
mutex_exit(&dd->dd_lock); |
|
1132 |
return (would_change(dd->dd_parent, delta, ancestor)); |
|
1133 |
} |
|
1134 |
||
1135 |
int |
|
1136 |
dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) |
|
1137 |
{ |
|
1138 |
const char *newname = arg; |
|
1139 |
dsl_pool_t *dp = dd->dd_pool; |
|
1140 |
objset_t *mos = dp->dp_meta_objset; |
|
1141 |
dsl_dir_t *newpds; |
|
1142 |
const char *tail; |
|
1143 |
int err, len; |
|
1144 |
||
1145 |
/* can't rename to different pool */ |
|
1146 |
len = strlen(dp->dp_root_dir->dd_myname); |
|
1147 |
if (strncmp(dp->dp_root_dir->dd_myname, newname, len != 0) || |
|
1148 |
newname[len] != '/') { |
|
1149 |
return (ENXIO); |
|
1150 |
} |
|
1151 |
||
1152 |
/* new parent should exist */ |
|
1544 | 1153 |
err = dsl_dir_open_spa(dp->dp_spa, newname, FTAG, &newpds, &tail); |
1154 |
if (err) |
|
1155 |
return (err); |
|
789 | 1156 |
|
1157 |
/* new name should not already exist */ |
|
1158 |
if (tail == NULL) { |
|
1159 |
dsl_dir_close(newpds, FTAG); |
|
1160 |
return (EEXIST); |
|
1161 |
} |
|
1162 |
||
1163 |
rw_enter(&dp->dp_config_rwlock, RW_WRITER); |
|
1164 |
||
1165 |
/* There should be 2 references: the open and the dirty */ |
|
1166 |
if (dmu_buf_refcount(dd->dd_dbuf) > 2) { |
|
1167 |
rw_exit(&dp->dp_config_rwlock); |
|
1168 |
dsl_dir_close(newpds, FTAG); |
|
1169 |
return (EBUSY); |
|
1170 |
} |
|
1171 |
||
1172 |
if (newpds != dd->dd_parent) { |
|
1173 |
dsl_dir_t *ancestor; |
|
1174 |
int64_t adelta; |
|
1175 |
uint64_t myspace, avail; |
|
1176 |
||
1177 |
ancestor = closest_common_ancestor(dd, newpds); |
|
1178 |
||
1179 |
/* no rename into our descendent */ |
|
1180 |
if (ancestor == dd) { |
|
1181 |
dsl_dir_close(newpds, FTAG); |
|
1182 |
rw_exit(&dp->dp_config_rwlock); |
|
1183 |
return (EINVAL); |
|
1184 |
} |
|
1185 |
||
1186 |
myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); |
|
1187 |
adelta = would_change(dd->dd_parent, -myspace, ancestor); |
|
1188 |
avail = dsl_dir_space_available(newpds, |
|
1189 |
ancestor, adelta, FALSE); |
|
1190 |
if (avail < myspace) { |
|
1191 |
dsl_dir_close(newpds, FTAG); |
|
1192 |
rw_exit(&dp->dp_config_rwlock); |
|
1193 |
return (ENOSPC); |
|
1194 |
} |
|
1195 |
||
1196 |
/* The point of no (unsuccessful) return */ |
|
1197 |
||
1198 |
dsl_dir_diduse_space(dd->dd_parent, -myspace, |
|
1199 |
-dd->dd_phys->dd_compressed_bytes, |
|
1200 |
-dd->dd_phys->dd_uncompressed_bytes, tx); |
|
1201 |
dsl_dir_diduse_space(newpds, myspace, |
|
1202 |
dd->dd_phys->dd_compressed_bytes, |
|
1203 |
dd->dd_phys->dd_uncompressed_bytes, tx); |
|
1204 |
} |
|
1205 |
||
1206 |
/* The point of no (unsuccessful) return */ |
|
1207 |
||
1208 |
dmu_buf_will_dirty(dd->dd_dbuf, tx); |
|
1209 |
||
1210 |
/* remove from old parent zapobj */ |
|
1211 |
err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, |
|
1212 |
dd->dd_myname, tx); |
|
1213 |
ASSERT3U(err, ==, 0); |
|
1214 |
||
1215 |
(void) strcpy(dd->dd_myname, tail); |
|
1216 |
dsl_dir_close(dd->dd_parent, dd); |
|
1217 |
dd->dd_phys->dd_parent_obj = newpds->dd_object; |
|
1544 | 1218 |
VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, |
1219 |
newpds->dd_object, NULL, dd, &dd->dd_parent)); |
|
789 | 1220 |
|
1221 |
/* add to new parent zapobj */ |
|
1222 |
err = zap_add(mos, newpds->dd_phys->dd_child_dir_zapobj, |
|
1223 |
dd->dd_myname, 8, 1, &dd->dd_object, tx); |
|
1224 |
ASSERT3U(err, ==, 0); |
|
1225 |
||
1226 |
dsl_dir_close(newpds, FTAG); |
|
1227 |
rw_exit(&dp->dp_config_rwlock); |
|
1228 |
return (0); |
|
1229 |
} |