author | eschrock |
Tue, 16 May 2006 11:20:11 -0700 | |
changeset 1986 | 628267397204 |
parent 1807 | 35c8b566d7af |
child 2981 | b80f5da0b8ed |
permissions | -rw-r--r-- |
789 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1544 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
789 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1544 | 22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#ifndef _ZIO_H |
|
27 |
#define _ZIO_H |
|
28 |
||
29 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
30 |
||
31 |
#include <sys/zfs_context.h> |
|
32 |
#include <sys/spa.h> |
|
33 |
#include <sys/txg.h> |
|
34 |
#include <sys/avl.h> |
|
35 |
#include <sys/dkio.h> |
|
36 |
#include <sys/fs/zfs.h> |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
37 |
#include <sys/zio_impl.h> |
789 | 38 |
|
39 |
#ifdef __cplusplus |
|
40 |
extern "C" { |
|
41 |
#endif |
|
42 |
||
43 |
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ |
|
44 |
||
45 |
typedef struct zio_block_tail { |
|
46 |
uint64_t zbt_magic; /* for validation, endianness */ |
|
47 |
zio_cksum_t zbt_cksum; /* 256-bit checksum */ |
|
48 |
} zio_block_tail_t; |
|
49 |
||
50 |
/* |
|
51 |
* Gang block headers are self-checksumming and contain an array |
|
52 |
* of block pointers. |
|
53 |
*/ |
|
54 |
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE |
|
55 |
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ |
|
56 |
sizeof (zio_block_tail_t)) / sizeof (blkptr_t)) |
|
57 |
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ |
|
58 |
sizeof (zio_block_tail_t) - \ |
|
59 |
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ |
|
60 |
sizeof (uint64_t)) |
|
61 |
||
62 |
#define ZIO_GET_IOSIZE(zio) \ |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
63 |
(BP_IS_GANG((zio)->io_bp) ? \ |
789 | 64 |
SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp)) |
65 |
||
66 |
typedef struct zio_gbh { |
|
67 |
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; |
|
68 |
uint64_t zg_filler[SPA_GBH_FILLER]; |
|
69 |
zio_block_tail_t zg_tail; |
|
70 |
} zio_gbh_phys_t; |
|
71 |
||
72 |
enum zio_checksum { |
|
73 |
ZIO_CHECKSUM_INHERIT = 0, |
|
74 |
ZIO_CHECKSUM_ON, |
|
75 |
ZIO_CHECKSUM_OFF, |
|
76 |
ZIO_CHECKSUM_LABEL, |
|
77 |
ZIO_CHECKSUM_GANG_HEADER, |
|
78 |
ZIO_CHECKSUM_ZILOG, |
|
79 |
ZIO_CHECKSUM_FLETCHER_2, |
|
80 |
ZIO_CHECKSUM_FLETCHER_4, |
|
81 |
ZIO_CHECKSUM_SHA256, |
|
82 |
ZIO_CHECKSUM_FUNCTIONS |
|
83 |
}; |
|
84 |
||
85 |
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2 |
|
86 |
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON |
|
87 |
||
88 |
enum zio_compress { |
|
89 |
ZIO_COMPRESS_INHERIT = 0, |
|
90 |
ZIO_COMPRESS_ON, |
|
91 |
ZIO_COMPRESS_OFF, |
|
92 |
ZIO_COMPRESS_LZJB, |
|
93 |
ZIO_COMPRESS_FUNCTIONS |
|
94 |
}; |
|
95 |
||
96 |
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB |
|
97 |
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF |
|
98 |
||
99 |
#define ZIO_PRIORITY_NOW (zio_priority_table[0]) |
|
100 |
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1]) |
|
101 |
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2]) |
|
102 |
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3]) |
|
103 |
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4]) |
|
104 |
#define ZIO_PRIORITY_FREE (zio_priority_table[5]) |
|
105 |
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6]) |
|
106 |
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7]) |
|
107 |
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8]) |
|
108 |
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9]) |
|
109 |
#define ZIO_PRIORITY_TABLE_SIZE 10 |
|
110 |
||
1544 | 111 |
#define ZIO_FLAG_MUSTSUCCEED 0x00000 |
112 |
#define ZIO_FLAG_CANFAIL 0x00001 |
|
113 |
#define ZIO_FLAG_FAILFAST 0x00002 |
|
114 |
#define ZIO_FLAG_CONFIG_HELD 0x00004 |
|
115 |
||
116 |
#define ZIO_FLAG_DONT_CACHE 0x00010 |
|
117 |
#define ZIO_FLAG_DONT_QUEUE 0x00020 |
|
118 |
#define ZIO_FLAG_DONT_PROPAGATE 0x00040 |
|
119 |
#define ZIO_FLAG_DONT_RETRY 0x00080 |
|
789 | 120 |
|
1544 | 121 |
#define ZIO_FLAG_PHYSICAL 0x00100 |
122 |
#define ZIO_FLAG_IO_BYPASS 0x00200 |
|
123 |
#define ZIO_FLAG_IO_REPAIR 0x00400 |
|
124 |
#define ZIO_FLAG_SPECULATIVE 0x00800 |
|
789 | 125 |
|
1544 | 126 |
#define ZIO_FLAG_RESILVER 0x01000 |
127 |
#define ZIO_FLAG_SCRUB 0x02000 |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
128 |
#define ZIO_FLAG_SCRUB_THREAD 0x04000 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
129 |
#define ZIO_FLAG_SUBBLOCK 0x08000 |
789 | 130 |
|
1544 | 131 |
#define ZIO_FLAG_NOBOOKMARK 0x10000 |
789 | 132 |
|
133 |
#define ZIO_FLAG_GANG_INHERIT \ |
|
134 |
(ZIO_FLAG_CANFAIL | \ |
|
135 |
ZIO_FLAG_FAILFAST | \ |
|
136 |
ZIO_FLAG_CONFIG_HELD | \ |
|
137 |
ZIO_FLAG_DONT_RETRY | \ |
|
138 |
ZIO_FLAG_IO_REPAIR | \ |
|
139 |
ZIO_FLAG_SPECULATIVE | \ |
|
140 |
ZIO_FLAG_RESILVER | \ |
|
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
141 |
ZIO_FLAG_SCRUB | \ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
142 |
ZIO_FLAG_SCRUB_THREAD) |
789 | 143 |
|
144 |
#define ZIO_FLAG_VDEV_INHERIT \ |
|
145 |
(ZIO_FLAG_GANG_INHERIT | \ |
|
146 |
ZIO_FLAG_DONT_CACHE | \ |
|
147 |
ZIO_FLAG_PHYSICAL) |
|
148 |
||
149 |
/* |
|
150 |
* We'll take the unused errno 'EBADE' (from the Convergent graveyard) |
|
151 |
* to indicate checksum errors. |
|
152 |
*/ |
|
153 |
#define ECKSUM EBADE |
|
154 |
||
155 |
typedef struct zio zio_t; |
|
156 |
typedef void zio_done_func_t(zio_t *zio); |
|
157 |
||
158 |
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE]; |
|
159 |
extern char *zio_type_name[ZIO_TYPES]; |
|
160 |
||
1544 | 161 |
/* |
162 |
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely |
|
163 |
* identifies any block in the pool. By convention, the meta-objset (MOS) |
|
164 |
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is |
|
165 |
* level -1 of the meta-dnode, and intent log blocks (which are chained |
|
166 |
* off the root block) have blkid == sequence number. In summary: |
|
167 |
* |
|
168 |
* mos is objset 0 |
|
169 |
* meta-dnode is object 0 |
|
170 |
* root block is <objset, 0, -1, 0> |
|
171 |
* intent log is <objset, 0, -1, ZIL sequence number> |
|
172 |
* |
|
173 |
* Note: this structure is called a bookmark because its first purpose was |
|
174 |
* to remember where to resume a pool-wide traverse. The absolute ordering |
|
175 |
* for block visitation during traversal is defined in compare_bookmark(). |
|
176 |
* |
|
177 |
* Note: this structure is passed between userland and the kernel. |
|
178 |
* Therefore it must not change size or alignment between 32/64 bit |
|
179 |
* compilation options. |
|
180 |
*/ |
|
181 |
typedef struct zbookmark { |
|
182 |
uint64_t zb_objset; |
|
183 |
uint64_t zb_object; |
|
184 |
int64_t zb_level; |
|
185 |
uint64_t zb_blkid; |
|
186 |
} zbookmark_t; |
|
187 |
||
789 | 188 |
struct zio { |
189 |
/* Core information about this I/O */ |
|
190 |
zio_t *io_parent; |
|
191 |
zio_t *io_root; |
|
192 |
spa_t *io_spa; |
|
1544 | 193 |
zbookmark_t io_bookmark; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
194 |
enum zio_checksum io_checksum; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
195 |
enum zio_compress io_compress; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
196 |
int io_ndvas; |
789 | 197 |
uint64_t io_txg; |
198 |
blkptr_t *io_bp; |
|
199 |
blkptr_t io_bp_copy; |
|
200 |
zio_t *io_child; |
|
201 |
zio_t *io_sibling_prev; |
|
202 |
zio_t *io_sibling_next; |
|
203 |
zio_transform_t *io_transform_stack; |
|
1544 | 204 |
zio_t *io_logical; |
789 | 205 |
|
206 |
/* Callback info */ |
|
207 |
zio_done_func_t *io_done; |
|
208 |
void *io_private; |
|
209 |
blkptr_t io_bp_orig; |
|
210 |
||
211 |
/* Data represented by this I/O */ |
|
212 |
void *io_data; |
|
213 |
uint64_t io_size; |
|
214 |
||
215 |
/* Stuff for the vdev stack */ |
|
216 |
vdev_t *io_vd; |
|
217 |
void *io_vsd; |
|
218 |
uint64_t io_offset; |
|
219 |
uint64_t io_deadline; |
|
220 |
uint64_t io_timestamp; |
|
221 |
avl_node_t io_offset_node; |
|
222 |
avl_node_t io_deadline_node; |
|
223 |
avl_tree_t *io_vdev_tree; |
|
224 |
zio_t *io_delegate_list; |
|
225 |
zio_t *io_delegate_next; |
|
226 |
||
227 |
/* Internal pipeline state */ |
|
228 |
int io_flags; |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
229 |
enum zio_type io_type; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
230 |
enum zio_stage io_stage; |
789 | 231 |
uint8_t io_stalled; |
232 |
uint8_t io_priority; |
|
233 |
struct dk_callback io_dk_callback; |
|
234 |
int io_cmd; |
|
235 |
int io_retries; |
|
236 |
int io_error; |
|
237 |
uint32_t io_numerrors; |
|
238 |
uint32_t io_pipeline; |
|
239 |
uint32_t io_async_stages; |
|
240 |
uint64_t io_children_notready; |
|
241 |
uint64_t io_children_notdone; |
|
242 |
void *io_waiter; |
|
243 |
kmutex_t io_lock; |
|
244 |
kcondvar_t io_cv; |
|
1544 | 245 |
|
246 |
/* FMA state */ |
|
247 |
uint64_t io_ena; |
|
789 | 248 |
}; |
249 |
||
250 |
extern zio_t *zio_null(zio_t *pio, spa_t *spa, |
|
251 |
zio_done_func_t *done, void *private, int flags); |
|
252 |
||
253 |
extern zio_t *zio_root(spa_t *spa, |
|
254 |
zio_done_func_t *done, void *private, int flags); |
|
255 |
||
256 |
extern zio_t *zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data, |
|
257 |
uint64_t size, zio_done_func_t *done, void *private, |
|
1544 | 258 |
int priority, int flags, zbookmark_t *zb); |
789 | 259 |
|
260 |
extern zio_t *zio_write(zio_t *pio, spa_t *spa, int checksum, int compress, |
|
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
261 |
int ncopies, uint64_t txg, blkptr_t *bp, void *data, uint64_t size, |
1544 | 262 |
zio_done_func_t *done, void *private, int priority, int flags, |
263 |
zbookmark_t *zb); |
|
789 | 264 |
|
265 |
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, int checksum, |
|
266 |
uint64_t txg, blkptr_t *bp, void *data, uint64_t size, |
|
1544 | 267 |
zio_done_func_t *done, void *private, int priority, int flags, |
268 |
zbookmark_t *zb); |
|
789 | 269 |
|
270 |
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
|
271 |
zio_done_func_t *done, void *private); |
|
272 |
||
273 |
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, |
|
274 |
zio_done_func_t *done, void *private); |
|
275 |
||
276 |
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, |
|
277 |
zio_done_func_t *done, void *private, int priority, int flags); |
|
278 |
||
279 |
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, |
|
280 |
uint64_t size, void *data, int checksum, |
|
281 |
zio_done_func_t *done, void *private, int priority, int flags); |
|
282 |
||
283 |
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, |
|
284 |
uint64_t size, void *data, int checksum, |
|
285 |
zio_done_func_t *done, void *private, int priority, int flags); |
|
286 |
||
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
287 |
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *bp, uint64_t txg); |
789 | 288 |
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg); |
289 |
||
290 |
extern int zio_wait(zio_t *zio); |
|
291 |
extern void zio_nowait(zio_t *zio); |
|
292 |
||
293 |
extern void *zio_buf_alloc(size_t size); |
|
294 |
extern void zio_buf_free(void *buf, size_t size); |
|
295 |
||
296 |
/* |
|
297 |
* Move an I/O to the next stage of the pipeline and execute that stage. |
|
298 |
* There's no locking on io_stage because there's no legitimate way for |
|
299 |
* multiple threads to be attempting to process the same I/O. |
|
300 |
*/ |
|
301 |
extern void zio_next_stage(zio_t *zio); |
|
302 |
extern void zio_next_stage_async(zio_t *zio); |
|
303 |
extern void zio_wait_children_done(zio_t *zio); |
|
304 |
||
305 |
/* |
|
306 |
* Delegate I/O to a child vdev. |
|
307 |
*/ |
|
308 |
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, |
|
309 |
uint64_t offset, void *data, uint64_t size, int type, int priority, |
|
310 |
int flags, zio_done_func_t *done, void *private); |
|
311 |
||
312 |
extern void zio_vdev_io_bypass(zio_t *zio); |
|
313 |
extern void zio_vdev_io_reissue(zio_t *zio); |
|
314 |
extern void zio_vdev_io_redone(zio_t *zio); |
|
315 |
||
316 |
extern void zio_checksum_verified(zio_t *zio); |
|
317 |
extern void zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp); |
|
318 |
||
319 |
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent); |
|
320 |
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent); |
|
321 |
||
1544 | 322 |
boolean_t zio_should_retry(zio_t *zio); |
323 |
||
789 | 324 |
/* |
325 |
* Initial setup and teardown. |
|
326 |
*/ |
|
327 |
extern void zio_init(void); |
|
328 |
extern void zio_fini(void); |
|
329 |
||
1544 | 330 |
/* |
331 |
* Fault injection |
|
332 |
*/ |
|
333 |
struct zinject_record; |
|
334 |
extern uint32_t zio_injection_enabled; |
|
335 |
extern int zio_inject_fault(char *name, int flags, int *id, |
|
336 |
struct zinject_record *record); |
|
337 |
extern int zio_inject_list_next(int *id, char *name, size_t buflen, |
|
338 |
struct zinject_record *record); |
|
339 |
extern int zio_clear_fault(int id); |
|
340 |
extern int zio_handle_fault_injection(zio_t *zio, int error); |
|
341 |
extern int zio_handle_device_injection(vdev_t *vd, int error); |
|
342 |
||
789 | 343 |
#ifdef __cplusplus |
344 |
} |
|
345 |
#endif |
|
346 |
||
347 |
#endif /* _ZIO_H */ |