author | eschrock |
Mon, 30 Jan 2006 21:34:28 -0800 | |
changeset 1354 | 81359ee1ee63 |
parent 982 | 9bc5c1db9740 |
child 1544 | 938876158511 |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
5 |
* Common Development and Distribution License, Version 1.0 only |
|
6 |
* (the "License"). You may not use this file except in compliance |
|
7 |
* with the License. |
|
8 |
* |
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 |
* or http://www.opensolaris.org/os/licensing. |
|
11 |
* See the License for the specific language governing permissions |
|
12 |
* and limitations under the License. |
|
13 |
* |
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 |
* If applicable, add the following below this CDDL HEADER, with the |
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 |
* |
|
20 |
* CDDL HEADER END |
|
21 |
*/ |
|
22 |
/* |
|
23 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
|
24 |
* Use is subject to license terms. |
|
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
#include <sys/zfs_context.h> |
|
30 |
#include <sys/dmu_objset.h> |
|
31 |
#include <sys/dsl_dir.h> |
|
32 |
#include <sys/dsl_dataset.h> |
|
33 |
#include <sys/dsl_prop.h> |
|
34 |
#include <sys/dsl_pool.h> |
|
35 |
#include <sys/dnode.h> |
|
36 |
#include <sys/dbuf.h> |
|
37 |
#include <sys/dmu_tx.h> |
|
38 |
#include <sys/zio_checksum.h> |
|
39 |
#include <sys/zap.h> |
|
40 |
#include <sys/zil.h> |
|
41 |
#include <sys/dmu_impl.h> |
|
42 |
||
43 |
||
44 |
spa_t * |
|
45 |
dmu_objset_spa(objset_t *os) |
|
46 |
{ |
|
47 |
return (os->os->os_spa); |
|
48 |
} |
|
49 |
||
50 |
zilog_t * |
|
51 |
dmu_objset_zil(objset_t *os) |
|
52 |
{ |
|
53 |
return (os->os->os_zil); |
|
54 |
} |
|
55 |
||
56 |
dsl_pool_t * |
|
57 |
dmu_objset_pool(objset_t *os) |
|
58 |
{ |
|
59 |
dsl_dataset_t *ds; |
|
60 |
||
61 |
if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) |
|
62 |
return (ds->ds_dir->dd_pool); |
|
63 |
else |
|
64 |
return (spa_get_dsl(os->os->os_spa)); |
|
65 |
} |
|
66 |
||
67 |
dsl_dataset_t * |
|
68 |
dmu_objset_ds(objset_t *os) |
|
69 |
{ |
|
70 |
return (os->os->os_dsl_dataset); |
|
71 |
} |
|
72 |
||
73 |
dmu_objset_type_t |
|
74 |
dmu_objset_type(objset_t *os) |
|
75 |
{ |
|
76 |
return (os->os->os_phys->os_type); |
|
77 |
} |
|
78 |
||
79 |
void |
|
80 |
dmu_objset_name(objset_t *os, char *buf) |
|
81 |
{ |
|
82 |
dsl_dataset_name(os->os->os_dsl_dataset, buf); |
|
83 |
} |
|
84 |
||
85 |
uint64_t |
|
86 |
dmu_objset_id(objset_t *os) |
|
87 |
{ |
|
88 |
dsl_dataset_t *ds = os->os->os_dsl_dataset; |
|
89 |
||
90 |
return (ds ? ds->ds_object : 0); |
|
91 |
} |
|
92 |
||
93 |
static void |
|
94 |
checksum_changed_cb(void *arg, uint64_t newval) |
|
95 |
{ |
|
96 |
objset_impl_t *osi = arg; |
|
97 |
||
98 |
/* |
|
99 |
* Inheritance should have been done by now. |
|
100 |
*/ |
|
101 |
ASSERT(newval != ZIO_CHECKSUM_INHERIT); |
|
102 |
||
103 |
osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); |
|
104 |
} |
|
105 |
||
106 |
static void |
|
107 |
compression_changed_cb(void *arg, uint64_t newval) |
|
108 |
{ |
|
109 |
objset_impl_t *osi = arg; |
|
110 |
||
111 |
/* |
|
112 |
* Inheritance and range checking should have been done by now. |
|
113 |
*/ |
|
114 |
ASSERT(newval != ZIO_COMPRESS_INHERIT); |
|
115 |
||
116 |
osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); |
|
117 |
} |
|
118 |
||
119 |
void |
|
120 |
dmu_objset_byteswap(void *buf, size_t size) |
|
121 |
{ |
|
122 |
objset_phys_t *osp = buf; |
|
123 |
||
124 |
ASSERT(size == sizeof (objset_phys_t)); |
|
125 |
dnode_byteswap(&osp->os_meta_dnode); |
|
126 |
byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t)); |
|
127 |
osp->os_type = BSWAP_64(osp->os_type); |
|
128 |
} |
|
129 |
||
130 |
objset_impl_t * |
|
131 |
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp) |
|
132 |
{ |
|
133 |
objset_impl_t *winner, *osi; |
|
134 |
int i, err, checksum; |
|
135 |
||
136 |
osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); |
|
137 |
osi->os.os = osi; |
|
138 |
osi->os_dsl_dataset = ds; |
|
139 |
osi->os_spa = spa; |
|
140 |
if (bp) |
|
141 |
osi->os_rootbp = *bp; |
|
142 |
osi->os_phys = zio_buf_alloc(sizeof (objset_phys_t)); |
|
143 |
if (!BP_IS_HOLE(&osi->os_rootbp)) { |
|
144 |
dprintf_bp(&osi->os_rootbp, "reading %s", ""); |
|
145 |
(void) arc_read(NULL, spa, &osi->os_rootbp, |
|
146 |
dmu_ot[DMU_OT_OBJSET].ot_byteswap, |
|
147 |
arc_bcopy_func, osi->os_phys, |
|
148 |
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT); |
|
149 |
} else { |
|
150 |
bzero(osi->os_phys, sizeof (objset_phys_t)); |
|
151 |
} |
|
152 |
osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); |
|
153 |
||
154 |
/* |
|
155 |
* Note: the changed_cb will be called once before the register |
|
156 |
* func returns, thus changing the checksum/compression from the |
|
157 |
* default (fletcher2/off). |
|
158 |
*/ |
|
159 |
if (ds) { |
|
160 |
err = dsl_prop_register(ds, "checksum", |
|
161 |
checksum_changed_cb, osi); |
|
162 |
ASSERT(err == 0); |
|
163 |
||
164 |
err = dsl_prop_register(ds, "compression", |
|
165 |
compression_changed_cb, osi); |
|
166 |
ASSERT(err == 0); |
|
167 |
} else { |
|
168 |
/* It's the meta-objset. */ |
|
982
9bc5c1db9740
6345547 assertion failed: tempreserve < arc.c/4 from zfs_rename
maybee
parents:
885
diff
changeset
|
169 |
/* XXX - turn off metadata compression temporarily */ |
789 | 170 |
osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; |
982
9bc5c1db9740
6345547 assertion failed: tempreserve < arc.c/4 from zfs_rename
maybee
parents:
885
diff
changeset
|
171 |
osi->os_compress = ZIO_COMPRESS_OFF; |
789 | 172 |
} |
173 |
||
174 |
/* |
|
175 |
* Metadata always gets compressed and checksummed. |
|
176 |
* If the data checksum is multi-bit correctable, and it's not |
|
177 |
* a ZBT-style checksum, then it's suitable for metadata as well. |
|
178 |
* Otherwise, the metadata checksum defaults to fletcher4. |
|
179 |
*/ |
|
180 |
checksum = osi->os_checksum; |
|
181 |
||
182 |
if (zio_checksum_table[checksum].ci_correctable && |
|
183 |
!zio_checksum_table[checksum].ci_zbt) |
|
184 |
osi->os_md_checksum = checksum; |
|
185 |
else |
|
186 |
osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; |
|
187 |
||
982
9bc5c1db9740
6345547 assertion failed: tempreserve < arc.c/4 from zfs_rename
maybee
parents:
885
diff
changeset
|
188 |
/* XXX - turn off metadata compression temporarily */ |
9bc5c1db9740
6345547 assertion failed: tempreserve < arc.c/4 from zfs_rename
maybee
parents:
885
diff
changeset
|
189 |
osi->os_md_compress = ZIO_COMPRESS_OFF; |
789 | 190 |
|
191 |
for (i = 0; i < TXG_SIZE; i++) { |
|
192 |
list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), |
|
193 |
offsetof(dnode_t, dn_dirty_link[i])); |
|
194 |
list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), |
|
195 |
offsetof(dnode_t, dn_dirty_link[i])); |
|
196 |
} |
|
197 |
list_create(&osi->os_dnodes, sizeof (dnode_t), |
|
198 |
offsetof(dnode_t, dn_link)); |
|
199 |
list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), |
|
200 |
offsetof(dmu_buf_impl_t, db_link)); |
|
201 |
||
202 |
osi->os_meta_dnode = dnode_special_open(osi, |
|
203 |
&osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); |
|
204 |
||
205 |
if (ds != NULL) { |
|
206 |
winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); |
|
207 |
if (winner) { |
|
208 |
dmu_objset_evict(ds, osi); |
|
209 |
osi = winner; |
|
210 |
} |
|
211 |
} |
|
212 |
||
213 |
return (osi); |
|
214 |
} |
|
215 |
||
216 |
/* called from zpl */ |
|
217 |
int |
|
218 |
dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, |
|
219 |
objset_t **osp) |
|
220 |
{ |
|
221 |
dsl_dataset_t *ds; |
|
222 |
int err; |
|
223 |
objset_t *os; |
|
224 |
objset_impl_t *osi; |
|
225 |
||
226 |
os = kmem_alloc(sizeof (objset_t), KM_SLEEP); |
|
227 |
err = dsl_dataset_open(name, mode, os, &ds); |
|
228 |
if (err) { |
|
229 |
kmem_free(os, sizeof (objset_t)); |
|
230 |
return (err); |
|
231 |
} |
|
232 |
||
233 |
osi = dsl_dataset_get_user_ptr(ds); |
|
234 |
if (osi == NULL) { |
|
235 |
blkptr_t bp; |
|
236 |
||
237 |
dsl_dataset_get_blkptr(ds, &bp); |
|
238 |
osi = dmu_objset_open_impl(dsl_dataset_get_spa(ds), ds, &bp); |
|
239 |
} |
|
240 |
||
241 |
os->os = osi; |
|
242 |
os->os_mode = mode; |
|
243 |
||
244 |
if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { |
|
245 |
dmu_objset_close(os); |
|
246 |
return (EINVAL); |
|
247 |
} |
|
248 |
*osp = os; |
|
249 |
return (0); |
|
250 |
} |
|
251 |
||
252 |
void |
|
253 |
dmu_objset_close(objset_t *os) |
|
254 |
{ |
|
255 |
dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); |
|
256 |
kmem_free(os, sizeof (objset_t)); |
|
257 |
} |
|
258 |
||
259 |
void |
|
260 |
dmu_objset_evict(dsl_dataset_t *ds, void *arg) |
|
261 |
{ |
|
262 |
objset_impl_t *osi = arg; |
|
263 |
int err, i; |
|
264 |
||
265 |
for (i = 0; i < TXG_SIZE; i++) { |
|
266 |
ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); |
|
267 |
ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); |
|
268 |
} |
|
269 |
||
270 |
if (ds) { |
|
271 |
err = dsl_prop_unregister(ds, "checksum", |
|
272 |
checksum_changed_cb, osi); |
|
273 |
ASSERT(err == 0); |
|
274 |
||
275 |
err = dsl_prop_unregister(ds, "compression", |
|
276 |
compression_changed_cb, osi); |
|
277 |
ASSERT(err == 0); |
|
278 |
} |
|
279 |
||
280 |
ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode); |
|
281 |
ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode); |
|
282 |
ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL); |
|
283 |
||
284 |
dnode_special_close(osi->os_meta_dnode); |
|
285 |
zil_free(osi->os_zil); |
|
286 |
||
287 |
zio_buf_free(osi->os_phys, sizeof (objset_phys_t)); |
|
288 |
kmem_free(osi, sizeof (objset_impl_t)); |
|
289 |
} |
|
290 |
||
291 |
/* called from dsl for meta-objset */ |
|
292 |
objset_impl_t * |
|
293 |
dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, dmu_objset_type_t type, |
|
294 |
dmu_tx_t *tx) |
|
295 |
{ |
|
296 |
objset_impl_t *osi; |
|
297 |
dnode_t *mdn; |
|
298 |
||
299 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
300 |
osi = dmu_objset_open_impl(spa, ds, NULL); |
|
301 |
mdn = osi->os_meta_dnode; |
|
302 |
||
303 |
dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, |
|
304 |
DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); |
|
305 |
||
306 |
/* |
|
307 |
* We don't want to have to increase the meta-dnode's nlevels |
|
308 |
* later, because then we could do it in quescing context while |
|
309 |
* we are also accessing it in open context. |
|
310 |
* |
|
311 |
* This precaution is not necessary for the MOS (ds == NULL), |
|
312 |
* because the MOS is only updated in syncing context. |
|
313 |
* This is most fortunate: the MOS is the only objset that |
|
314 |
* needs to be synced multiple times as spa_sync() iterates |
|
315 |
* to convergence, so minimizing its dn_nlevels matters. |
|
316 |
*/ |
|
317 |
if (ds != NULL) |
|
318 |
mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = |
|
319 |
mdn->dn_nlevels = DN_META_DNODE_LEVELS; |
|
320 |
||
321 |
ASSERT(type != DMU_OST_NONE); |
|
322 |
ASSERT(type != DMU_OST_ANY); |
|
323 |
ASSERT(type < DMU_OST_NUMTYPES); |
|
324 |
osi->os_phys->os_type = type; |
|
325 |
||
326 |
dsl_dataset_dirty(ds, tx); |
|
327 |
||
328 |
return (osi); |
|
329 |
} |
|
330 |
||
331 |
struct oscarg { |
|
332 |
void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); |
|
333 |
void *userarg; |
|
334 |
dsl_dataset_t *clone_parent; |
|
335 |
const char *fullname; |
|
336 |
const char *lastname; |
|
337 |
dmu_objset_type_t type; |
|
338 |
}; |
|
339 |
||
340 |
static int |
|
341 |
dmu_objset_create_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) |
|
342 |
{ |
|
343 |
struct oscarg *oa = arg; |
|
344 |
dsl_dataset_t *ds; |
|
345 |
int err; |
|
346 |
blkptr_t bp; |
|
347 |
||
348 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
349 |
||
350 |
err = dsl_dataset_create_sync(dd, oa->fullname, oa->lastname, |
|
351 |
oa->clone_parent, tx); |
|
352 |
dprintf_dd(dd, "fn=%s ln=%s err=%d\n", |
|
353 |
oa->fullname, oa->lastname, err); |
|
354 |
if (err) |
|
355 |
return (err); |
|
356 |
||
357 |
err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, oa->fullname, |
|
358 |
DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds); |
|
359 |
ASSERT3U(err, ==, 0); |
|
360 |
dsl_dataset_get_blkptr(ds, &bp); |
|
361 |
if (BP_IS_HOLE(&bp)) { |
|
362 |
objset_impl_t *osi; |
|
363 |
||
364 |
/* This is an empty dmu_objset; not a clone. */ |
|
365 |
osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), |
|
366 |
ds, oa->type, tx); |
|
367 |
||
368 |
if (oa->userfunc) |
|
369 |
oa->userfunc(&osi->os, oa->userarg, tx); |
|
370 |
} |
|
371 |
dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); |
|
372 |
||
373 |
return (0); |
|
374 |
} |
|
375 |
||
376 |
int |
|
377 |
dmu_objset_create(const char *name, dmu_objset_type_t type, |
|
378 |
objset_t *clone_parent, |
|
379 |
void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) |
|
380 |
{ |
|
381 |
dsl_dir_t *pds; |
|
382 |
const char *tail; |
|
383 |
int err = 0; |
|
384 |
||
385 |
pds = dsl_dir_open(name, FTAG, &tail); |
|
386 |
if (pds == NULL) |
|
387 |
return (ENOENT); |
|
388 |
if (tail == NULL) { |
|
389 |
dsl_dir_close(pds, FTAG); |
|
390 |
return (EEXIST); |
|
391 |
} |
|
392 |
||
393 |
dprintf("name=%s\n", name); |
|
394 |
||
395 |
if (tail[0] == '@') { |
|
396 |
/* |
|
397 |
* If we're creating a snapshot, make sure everything |
|
398 |
* they might want is on disk. XXX Sketchy to know |
|
399 |
* about snapshots here, better to put in DSL. |
|
400 |
*/ |
|
401 |
objset_t *os; |
|
402 |
size_t plen = strchr(name, '@') - name + 1; |
|
403 |
char *pbuf = kmem_alloc(plen, KM_SLEEP); |
|
404 |
bcopy(name, pbuf, plen - 1); |
|
405 |
pbuf[plen - 1] = '\0'; |
|
406 |
||
407 |
err = dmu_objset_open(pbuf, DMU_OST_ANY, DS_MODE_STANDARD, &os); |
|
408 |
if (err == 0) { |
|
409 |
err = zil_suspend(dmu_objset_zil(os)); |
|
410 |
if (err == 0) { |
|
411 |
err = dsl_dir_sync_task(pds, |
|
412 |
dsl_dataset_snapshot_sync, |
|
413 |
(void*)(tail+1), 16*1024); |
|
414 |
zil_resume(dmu_objset_zil(os)); |
|
415 |
} |
|
416 |
dmu_objset_close(os); |
|
417 |
} |
|
418 |
kmem_free(pbuf, plen); |
|
419 |
} else { |
|
420 |
struct oscarg oa = { 0 }; |
|
421 |
oa.userfunc = func; |
|
422 |
oa.userarg = arg; |
|
423 |
oa.fullname = name; |
|
424 |
oa.lastname = tail; |
|
425 |
oa.type = type; |
|
426 |
if (clone_parent != NULL) { |
|
427 |
/* |
|
428 |
* You can't clone to a different type. |
|
429 |
*/ |
|
430 |
if (clone_parent->os->os_phys->os_type != type) { |
|
431 |
dsl_dir_close(pds, FTAG); |
|
432 |
return (EINVAL); |
|
433 |
} |
|
434 |
oa.clone_parent = clone_parent->os->os_dsl_dataset; |
|
435 |
} |
|
436 |
err = dsl_dir_sync_task(pds, dmu_objset_create_sync, &oa, |
|
437 |
256*1024); |
|
438 |
} |
|
439 |
dsl_dir_close(pds, FTAG); |
|
440 |
return (err); |
|
441 |
} |
|
442 |
||
443 |
int |
|
444 |
dmu_objset_destroy(const char *name) |
|
445 |
{ |
|
446 |
objset_t *os; |
|
447 |
int error; |
|
448 |
||
449 |
/* |
|
450 |
* If it looks like we'll be able to destroy it, and there's |
|
451 |
* an unplayed replay log sitting around, destroy the log. |
|
452 |
* It would be nicer to do this in dsl_dataset_destroy_sync(), |
|
453 |
* but the replay log objset is modified in open context. |
|
454 |
*/ |
|
455 |
error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); |
|
456 |
if (error == 0) { |
|
457 |
zil_destroy(dmu_objset_zil(os)); |
|
458 |
dmu_objset_close(os); |
|
459 |
} |
|
460 |
||
461 |
/* XXX uncache everything? */ |
|
462 |
return (dsl_dataset_destroy(name)); |
|
463 |
} |
|
464 |
||
465 |
int |
|
466 |
dmu_objset_rollback(const char *name) |
|
467 |
{ |
|
468 |
int err; |
|
469 |
objset_t *os; |
|
470 |
||
471 |
err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); |
|
472 |
if (err == 0) { |
|
473 |
err = zil_suspend(dmu_objset_zil(os)); |
|
474 |
if (err == 0) |
|
475 |
zil_resume(dmu_objset_zil(os)); |
|
476 |
dmu_objset_close(os); |
|
477 |
if (err == 0) { |
|
478 |
/* XXX uncache everything? */ |
|
479 |
err = dsl_dataset_rollback(name); |
|
480 |
} |
|
481 |
} |
|
482 |
return (err); |
|
483 |
} |
|
484 |
||
485 |
static void |
|
486 |
dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) |
|
487 |
{ |
|
488 |
dnode_t *dn = list_head(list); |
|
489 |
int level, err; |
|
490 |
||
491 |
for (level = 0; dn = list_head(list); level++) { |
|
492 |
zio_t *zio; |
|
493 |
zio = zio_root(os->os_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); |
|
494 |
||
495 |
ASSERT3U(level, <=, DN_MAX_LEVELS); |
|
496 |
||
497 |
while (dn) { |
|
498 |
dnode_t *next = list_next(list, dn); |
|
499 |
||
500 |
list_remove(list, dn); |
|
501 |
if (dnode_sync(dn, level, zio, tx) == 0) { |
|
502 |
/* |
|
503 |
* This dnode requires syncing at higher |
|
504 |
* levels; put it back onto the list. |
|
505 |
*/ |
|
506 |
if (next) |
|
507 |
list_insert_before(list, next, dn); |
|
508 |
else |
|
509 |
list_insert_tail(list, dn); |
|
510 |
} |
|
511 |
dn = next; |
|
512 |
} |
|
513 |
err = zio_wait(zio); |
|
514 |
ASSERT(err == 0); |
|
515 |
} |
|
516 |
} |
|
517 |
||
518 |
/* ARGSUSED */ |
|
519 |
static void |
|
520 |
killer(zio_t *zio, arc_buf_t *abuf, void *arg) |
|
521 |
{ |
|
522 |
objset_impl_t *os = arg; |
|
523 |
objset_phys_t *osphys = zio->io_data; |
|
524 |
dnode_phys_t *dnp = &osphys->os_meta_dnode; |
|
525 |
int i; |
|
526 |
||
527 |
ASSERT3U(zio->io_error, ==, 0); |
|
528 |
||
529 |
/* |
|
530 |
* Update rootbp fill count. |
|
531 |
*/ |
|
532 |
os->os_rootbp.blk_fill = 1; /* count the meta-dnode */ |
|
533 |
for (i = 0; i < dnp->dn_nblkptr; i++) |
|
534 |
os->os_rootbp.blk_fill += dnp->dn_blkptr[i].blk_fill; |
|
535 |
||
536 |
BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET); |
|
537 |
BP_SET_LEVEL(zio->io_bp, 0); |
|
538 |
||
539 |
if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), |
|
540 |
BP_IDENTITY(&zio->io_bp_orig))) { |
|
541 |
dsl_dataset_block_kill(os->os_dsl_dataset, &zio->io_bp_orig, |
|
542 |
os->os_synctx); |
|
543 |
dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp, |
|
544 |
os->os_synctx); |
|
545 |
} |
|
546 |
} |
|
547 |
||
548 |
||
549 |
/* called from dsl */ |
|
550 |
void |
|
551 |
dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx) |
|
552 |
{ |
|
553 |
extern taskq_t *dbuf_tq; |
|
554 |
int txgoff; |
|
555 |
list_t *dirty_list; |
|
556 |
int err; |
|
557 |
arc_buf_t *abuf = |
|
558 |
arc_buf_alloc(os->os_spa, sizeof (objset_phys_t), FTAG); |
|
559 |
||
560 |
ASSERT(dmu_tx_is_syncing(tx)); |
|
561 |
ASSERT(os->os_synctx == NULL); |
|
562 |
/* XXX the write_done callback should really give us the tx... */ |
|
563 |
os->os_synctx = tx; |
|
564 |
||
565 |
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); |
|
566 |
||
567 |
txgoff = tx->tx_txg & TXG_MASK; |
|
568 |
||
569 |
dmu_objset_sync_dnodes(os, &os->os_free_dnodes[txgoff], tx); |
|
570 |
dmu_objset_sync_dnodes(os, &os->os_dirty_dnodes[txgoff], tx); |
|
571 |
||
572 |
/* |
|
573 |
* Free intent log blocks up to this tx. |
|
574 |
*/ |
|
575 |
zil_sync(os->os_zil, tx); |
|
576 |
||
577 |
/* |
|
578 |
* Sync meta-dnode |
|
579 |
*/ |
|
580 |
dirty_list = &os->os_dirty_dnodes[txgoff]; |
|
581 |
ASSERT(list_head(dirty_list) == NULL); |
|
582 |
list_insert_tail(dirty_list, os->os_meta_dnode); |
|
583 |
dmu_objset_sync_dnodes(os, dirty_list, tx); |
|
584 |
||
585 |
/* |
|
586 |
* Sync the root block. |
|
587 |
*/ |
|
588 |
bcopy(os->os_phys, abuf->b_data, sizeof (objset_phys_t)); |
|
589 |
err = arc_write(NULL, os->os_spa, os->os_md_checksum, |
|
590 |
os->os_md_compress, tx->tx_txg, &os->os_rootbp, abuf, killer, os, |
|
591 |
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT); |
|
592 |
ASSERT(err == 0); |
|
593 |
arc_buf_free(abuf, FTAG); |
|
594 |
||
595 |
dsl_dataset_set_blkptr(os->os_dsl_dataset, &os->os_rootbp, tx); |
|
596 |
||
597 |
ASSERT3P(os->os_synctx, ==, tx); |
|
598 |
taskq_wait(dbuf_tq); |
|
599 |
os->os_synctx = NULL; |
|
600 |
} |
|
601 |
||
602 |
void |
|
603 |
dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds) |
|
604 |
{ |
|
605 |
if (os->os->os_dsl_dataset != NULL) { |
|
606 |
dsl_dataset_stats(os->os->os_dsl_dataset, dds); |
|
607 |
} else { |
|
608 |
ASSERT(os->os->os_phys->os_type == DMU_OST_META); |
|
609 |
bzero(dds, sizeof (*dds)); |
|
610 |
} |
|
611 |
dds->dds_type = os->os->os_phys->os_type; |
|
612 |
} |
|
613 |
||
614 |
int |
|
615 |
dmu_objset_is_snapshot(objset_t *os) |
|
616 |
{ |
|
617 |
if (os->os->os_dsl_dataset != NULL) |
|
618 |
return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); |
|
619 |
else |
|
620 |
return (B_FALSE); |
|
621 |
} |
|
622 |
||
623 |
int |
|
624 |
dmu_snapshot_list_next(objset_t *os, int namelen, char *name, |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
625 |
uint64_t *idp, uint64_t *offp) |
789 | 626 |
{ |
627 |
dsl_dataset_t *ds = os->os->os_dsl_dataset; |
|
628 |
zap_cursor_t cursor; |
|
629 |
zap_attribute_t attr; |
|
630 |
||
631 |
if (ds->ds_phys->ds_snapnames_zapobj == 0) |
|
632 |
return (ENOENT); |
|
633 |
||
634 |
zap_cursor_init_serialized(&cursor, |
|
635 |
ds->ds_dir->dd_pool->dp_meta_objset, |
|
636 |
ds->ds_phys->ds_snapnames_zapobj, *offp); |
|
637 |
||
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
638 |
if (zap_cursor_retrieve(&cursor, &attr) != 0) { |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
639 |
zap_cursor_fini(&cursor); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
640 |
return (ENOENT); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
641 |
} |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
642 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
643 |
if (strlen(attr.za_name) + 1 > namelen) { |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
644 |
zap_cursor_fini(&cursor); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
645 |
return (ENAMETOOLONG); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
646 |
} |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
647 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
648 |
(void) strcpy(name, attr.za_name); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
649 |
if (idp) |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
650 |
*idp = attr.za_first_integer; |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
651 |
zap_cursor_advance(&cursor); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
652 |
*offp = zap_cursor_serialize(&cursor); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
653 |
zap_cursor_fini(&cursor); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
654 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
655 |
return (0); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
656 |
} |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
657 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
658 |
int |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
659 |
dmu_dir_list_next(objset_t *os, int namelen, char *name, |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
660 |
uint64_t *idp, uint64_t *offp) |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
661 |
{ |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
662 |
dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
663 |
zap_cursor_t cursor; |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
664 |
zap_attribute_t attr; |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
665 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
666 |
if (dd->dd_phys->dd_child_dir_zapobj == 0) |
789 | 667 |
return (ENOENT); |
668 |
||
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
669 |
/* there is no next dir on a snapshot! */ |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
670 |
if (os->os->os_dsl_dataset->ds_object != |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
671 |
dd->dd_phys->dd_head_dataset_obj) |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
672 |
return (ENOENT); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
673 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
674 |
zap_cursor_init_serialized(&cursor, |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
675 |
dd->dd_pool->dp_meta_objset, |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
676 |
dd->dd_phys->dd_child_dir_zapobj, *offp); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
677 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
678 |
if (zap_cursor_retrieve(&cursor, &attr) != 0) { |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
679 |
zap_cursor_fini(&cursor); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
680 |
return (ENOENT); |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
681 |
} |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
682 |
|
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
683 |
if (strlen(attr.za_name) + 1 > namelen) { |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
684 |
zap_cursor_fini(&cursor); |
789 | 685 |
return (ENAMETOOLONG); |
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
686 |
} |
789 | 687 |
|
688 |
(void) strcpy(name, attr.za_name); |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
689 |
if (idp) |
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
690 |
*idp = attr.za_first_integer; |
789 | 691 |
zap_cursor_advance(&cursor); |
692 |
*offp = zap_cursor_serialize(&cursor); |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
693 |
zap_cursor_fini(&cursor); |
789 | 694 |
|
695 |
return (0); |
|
696 |
} |
|
697 |
||
698 |
/* |
|
699 |
* Find all objsets under name, and for each, call 'func(child_name, arg)'. |
|
700 |
*/ |
|
701 |
void |
|
702 |
dmu_objset_find(char *name, void func(char *, void *), void *arg, int flags) |
|
703 |
{ |
|
704 |
dsl_dir_t *dd; |
|
705 |
objset_t *os; |
|
706 |
uint64_t snapobj; |
|
707 |
zap_cursor_t zc; |
|
708 |
zap_attribute_t attr; |
|
709 |
char *child; |
|
710 |
int do_self; |
|
711 |
||
712 |
dd = dsl_dir_open(name, FTAG, NULL); |
|
713 |
if (dd == NULL) |
|
714 |
return; |
|
715 |
||
716 |
do_self = (dd->dd_phys->dd_head_dataset_obj != 0); |
|
717 |
||
718 |
/* |
|
719 |
* Iterate over all children. |
|
720 |
*/ |
|
721 |
if (dd->dd_phys->dd_child_dir_zapobj != 0) { |
|
722 |
for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, |
|
723 |
dd->dd_phys->dd_child_dir_zapobj); |
|
724 |
zap_cursor_retrieve(&zc, &attr) == 0; |
|
725 |
(void) zap_cursor_advance(&zc)) { |
|
726 |
ASSERT(attr.za_integer_length == sizeof (uint64_t)); |
|
727 |
ASSERT(attr.za_num_integers == 1); |
|
728 |
||
729 |
/* |
|
730 |
* No separating '/' because parent's name ends in /. |
|
731 |
*/ |
|
732 |
child = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
|
733 |
/* XXX could probably just use name here */ |
|
734 |
dsl_dir_name(dd, child); |
|
735 |
(void) strcat(child, "/"); |
|
736 |
(void) strcat(child, attr.za_name); |
|
737 |
dmu_objset_find(child, func, arg, flags); |
|
738 |
kmem_free(child, MAXPATHLEN); |
|
739 |
} |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
740 |
zap_cursor_fini(&zc); |
789 | 741 |
} |
742 |
||
743 |
/* |
|
744 |
* Iterate over all snapshots. |
|
745 |
*/ |
|
746 |
if ((flags & DS_FIND_SNAPSHOTS) && |
|
747 |
dmu_objset_open(name, DMU_OST_ANY, |
|
748 |
DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) { |
|
749 |
||
750 |
snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj; |
|
751 |
dmu_objset_close(os); |
|
752 |
||
753 |
for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj); |
|
754 |
zap_cursor_retrieve(&zc, &attr) == 0; |
|
755 |
(void) zap_cursor_advance(&zc)) { |
|
756 |
ASSERT(attr.za_integer_length == sizeof (uint64_t)); |
|
757 |
ASSERT(attr.za_num_integers == 1); |
|
758 |
||
759 |
child = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
|
760 |
/* XXX could probably just use name here */ |
|
761 |
dsl_dir_name(dd, child); |
|
762 |
(void) strcat(child, "@"); |
|
763 |
(void) strcat(child, attr.za_name); |
|
764 |
func(child, arg); |
|
765 |
kmem_free(child, MAXPATHLEN); |
|
766 |
} |
|
885
d925b21dba78
6347493 tar of 25K empty directory entries in ZFS takes 30+ seconds ...
ahrens
parents:
789
diff
changeset
|
767 |
zap_cursor_fini(&zc); |
789 | 768 |
} |
769 |
||
770 |
dsl_dir_close(dd, FTAG); |
|
771 |
||
772 |
/* |
|
773 |
* Apply to self if appropriate. |
|
774 |
*/ |
|
775 |
if (do_self) |
|
776 |
func(name, arg); |
|
777 |
} |