usr/src/uts/common/fs/zfs/zap_leaf.c
changeset 5331 3047ad28a67b
parent 3052 7a3625b7393b
child 5498 334b476844ca
equal deleted inserted replaced
5330:71aba7712438 5331:3047ad28a67b
    17  * information: Portions Copyright [yyyy] [name of copyright owner]
    17  * information: Portions Copyright [yyyy] [name of copyright owner]
    18  *
    18  *
    19  * CDDL HEADER END
    19  * CDDL HEADER END
    20  */
    20  */
    21 /*
    21 /*
    22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
    22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
    23  * Use is subject to license terms.
    23  * Use is subject to license terms.
    24  */
    24  */
    25 
    25 
    26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
    26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
    27 
    27 
    35 #include <sys/zap.h>
    35 #include <sys/zap.h>
    36 #include <sys/zap_impl.h>
    36 #include <sys/zap_impl.h>
    37 #include <sys/zap_leaf.h>
    37 #include <sys/zap_leaf.h>
    38 #include <sys/spa.h>
    38 #include <sys/spa.h>
    39 #include <sys/dmu.h>
    39 #include <sys/dmu.h>
       
    40 
       
    41 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
    40 
    42 
    41 #define	CHAIN_END 0xffff /* end of the chunk chain */
    43 #define	CHAIN_END 0xffff /* end of the chunk chain */
    42 
    44 
    43 /* half the (current) minimum block size */
    45 /* half the (current) minimum block size */
    44 #define	MAX_ARRAY_BYTES (8<<10)
    46 #define	MAX_ARRAY_BYTES (8<<10)
   148 		}
   150 		}
   149 	}
   151 	}
   150 }
   152 }
   151 
   153 
   152 void
   154 void
   153 zap_leaf_init(zap_leaf_t *l)
   155 zap_leaf_init(zap_leaf_t *l, int version)
   154 {
   156 {
   155 	int i;
   157 	int i;
   156 
   158 
   157 	l->l_bs = highbit(l->l_dbuf->db_size)-1;
   159 	l->l_bs = highbit(l->l_dbuf->db_size)-1;
   158 	zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
   160 	zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
   163 	}
   165 	}
   164 	ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
   166 	ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
   165 	l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
   167 	l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
   166 	l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
   168 	l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
   167 	l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
   169 	l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
       
   170 	if (version >= SPA_VERSION_NORMALIZATION)
       
   171 		l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
   168 }
   172 }
   169 
   173 
   170 /*
   174 /*
   171  * Routines which manipulate leaf chunks (l_chunk[]).
   175  * Routines which manipulate leaf chunks (l_chunk[]).
   172  */
   176  */
   325 }
   329 }
   326 
   330 
   327 /*
   331 /*
   328  * Only to be used on 8-bit arrays.
   332  * Only to be used on 8-bit arrays.
   329  * array_len is actual len in bytes (not encoded le_value_length).
   333  * array_len is actual len in bytes (not encoded le_value_length).
   330  * buf is null-terminated.
   334  * namenorm is null-terminated.
   331  */
   335  */
   332 static int
   336 static boolean_t
   333 zap_leaf_array_equal(zap_leaf_t *l, int chunk,
   337 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len)
   334     int array_len, const char *buf)
       
   335 {
   338 {
   336 	int bseen = 0;
   339 	int bseen = 0;
   337 
   340 
       
   341 	if (zn->zn_matchtype == MT_FIRST) {
       
   342 		char *thisname = kmem_alloc(array_len, KM_SLEEP);
       
   343 		boolean_t match;
       
   344 
       
   345 		zap_leaf_array_read(l, chunk, 1, array_len, 1,
       
   346 		    array_len, thisname);
       
   347 		match = zap_match(zn, thisname);
       
   348 		kmem_free(thisname, array_len);
       
   349 		return (match);
       
   350 	}
       
   351 
       
   352 	/* Fast path for exact matching */
   338 	while (bseen < array_len) {
   353 	while (bseen < array_len) {
   339 		struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
   354 		struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
   340 		int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
   355 		int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
   341 		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
   356 		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
   342 		if (bcmp(la->la_array, buf + bseen, toread))
   357 		if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread))
   343 			break;
   358 			break;
   344 		chunk = la->la_next;
   359 		chunk = la->la_next;
   345 		bseen += toread;
   360 		bseen += toread;
   346 	}
   361 	}
   347 	return (bseen == array_len);
   362 	return (bseen == array_len);
   350 /*
   365 /*
   351  * Routines which manipulate leaf entries.
   366  * Routines which manipulate leaf entries.
   352  */
   367  */
   353 
   368 
   354 int
   369 int
   355 zap_leaf_lookup(zap_leaf_t *l,
   370 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
   356     const char *name, uint64_t h, zap_entry_handle_t *zeh)
       
   357 {
   371 {
   358 	uint16_t *chunkp;
   372 	uint16_t *chunkp;
   359 	struct zap_leaf_entry *le;
   373 	struct zap_leaf_entry *le;
   360 
   374 
   361 	ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
   375 	ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
   362 
   376 
   363 	for (chunkp = LEAF_HASH_ENTPTR(l, h);
   377 again:
       
   378 	for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
   364 	    *chunkp != CHAIN_END; chunkp = &le->le_next) {
   379 	    *chunkp != CHAIN_END; chunkp = &le->le_next) {
   365 		uint16_t chunk = *chunkp;
   380 		uint16_t chunk = *chunkp;
   366 		le = ZAP_LEAF_ENTRY(l, chunk);
   381 		le = ZAP_LEAF_ENTRY(l, chunk);
   367 
   382 
   368 		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
   383 		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
   369 		ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
   384 		ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
   370 
   385 
   371 		if (le->le_hash != h)
   386 		if (le->le_hash != zn->zn_hash)
   372 			continue;
   387 			continue;
   373 
   388 
   374 		if (zap_leaf_array_equal(l, le->le_name_chunk,
   389 		/*
   375 		    le->le_name_length, name)) {
   390 		 * NB: the entry chain is always sorted by cd on
       
   391 		 * normalized zap objects, so this will find the
       
   392 		 * lowest-cd match for MT_FIRST.
       
   393 		 */
       
   394 		ASSERT(zn->zn_matchtype == MT_EXACT ||
       
   395 		    (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
       
   396 		if (zap_leaf_array_match(l, zn, le->le_name_chunk,
       
   397 		    le->le_name_length)) {
   376 			zeh->zeh_num_integers = le->le_value_length;
   398 			zeh->zeh_num_integers = le->le_value_length;
   377 			zeh->zeh_integer_size = le->le_int_size;
   399 			zeh->zeh_integer_size = le->le_int_size;
   378 			zeh->zeh_cd = le->le_cd;
   400 			zeh->zeh_cd = le->le_cd;
   379 			zeh->zeh_hash = le->le_hash;
   401 			zeh->zeh_hash = le->le_hash;
   380 			zeh->zeh_chunkp = chunkp;
   402 			zeh->zeh_chunkp = chunkp;
   381 			zeh->zeh_leaf = l;
   403 			zeh->zeh_leaf = l;
   382 			return (0);
   404 			return (0);
   383 		}
   405 		}
       
   406 	}
       
   407 
       
   408 	/*
       
   409 	 * NB: we could of course do this in one pass, but that would be
       
   410 	 * a pain.  We'll see if MT_BEST is even used much.
       
   411 	 */
       
   412 	if (zn->zn_matchtype == MT_BEST) {
       
   413 		zn->zn_matchtype = MT_FIRST;
       
   414 		goto again;
   384 	}
   415 	}
   385 
   416 
   386 	return (ENOENT);
   417 	return (ENOENT);
   387 }
   418 }
   388 
   419 
   537 	    ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
   568 	    ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
   538 	if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
   569 	if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
   539 		return (E2BIG);
   570 		return (E2BIG);
   540 
   571 
   541 	if (cd == ZAP_MAXCD) {
   572 	if (cd == ZAP_MAXCD) {
   542 		for (cd = 0; cd < ZAP_MAXCD; cd++) {
   573 		/* find the lowest unused cd */
       
   574 		if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
       
   575 			cd = 0;
       
   576 
   543 			for (chunk = *LEAF_HASH_ENTPTR(l, h);
   577 			for (chunk = *LEAF_HASH_ENTPTR(l, h);
   544 			    chunk != CHAIN_END; chunk = le->le_next) {
   578 			    chunk != CHAIN_END; chunk = le->le_next) {
   545 				le = ZAP_LEAF_ENTRY(l, chunk);
   579 				le = ZAP_LEAF_ENTRY(l, chunk);
   546 				if (le->le_hash == h &&
   580 				if (le->le_cd > cd)
   547 				    le->le_cd == cd) {
       
   548 					break;
   581 					break;
       
   582 				if (le->le_hash == h) {
       
   583 					ASSERT3U(cd, ==, le->le_cd);
       
   584 					cd++;
   549 				}
   585 				}
   550 			}
   586 			}
   551 			/* If this cd is not in use, we are good. */
   587 		} else {
   552 			if (chunk == CHAIN_END)
   588 			/* old unsorted format; do it the O(n^2) way */
   553 				break;
   589 			for (cd = 0; cd < ZAP_MAXCD; cd++) {
   554 		}
   590 				for (chunk = *LEAF_HASH_ENTPTR(l, h);
   555 		/* If we tried all the cd's, we lose. */
   591 				    chunk != CHAIN_END; chunk = le->le_next) {
   556 		if (cd == ZAP_MAXCD)
   592 					le = ZAP_LEAF_ENTRY(l, chunk);
   557 			return (ENOSPC);
   593 					if (le->le_hash == h &&
       
   594 					    le->le_cd == cd) {
       
   595 						break;
       
   596 					}
       
   597 				}
       
   598 				/* If this cd is not in use, we are good. */
       
   599 				if (chunk == CHAIN_END)
       
   600 					break;
       
   601 			}
       
   602 		}
       
   603 		/*
       
   604 		 * we would run out of space in a block before we could
       
   605 		 * have ZAP_MAXCD entries
       
   606 		 */
       
   607 		ASSERT3U(cd, <, ZAP_MAXCD);
   558 	}
   608 	}
   559 
   609 
   560 	if (l->l_phys->l_hdr.lh_nfree < numchunks)
   610 	if (l->l_phys->l_hdr.lh_nfree < numchunks)
   561 		return (EAGAIN);
   611 		return (EAGAIN);
   562 
   612 
   572 	le->le_int_size = integer_size;
   622 	le->le_int_size = integer_size;
   573 	le->le_hash = h;
   623 	le->le_hash = h;
   574 	le->le_cd = cd;
   624 	le->le_cd = cd;
   575 
   625 
   576 	/* link it into the hash chain */
   626 	/* link it into the hash chain */
   577 	chunkp = LEAF_HASH_ENTPTR(l, h);
   627 	/* XXX if we did the search above, we could just use that */
   578 	le->le_next = *chunkp;
   628 	chunkp = zap_leaf_rehash_entry(l, chunk);
   579 	*chunkp = chunk;
       
   580 
   629 
   581 	l->l_phys->l_hdr.lh_nentries++;
   630 	l->l_phys->l_hdr.lh_nentries++;
   582 
   631 
   583 	zeh->zeh_leaf = l;
   632 	zeh->zeh_leaf = l;
   584 	zeh->zeh_num_integers = num_integers;
   633 	zeh->zeh_num_integers = num_integers;
   589 
   638 
   590 	return (0);
   639 	return (0);
   591 }
   640 }
   592 
   641 
   593 /*
   642 /*
       
   643  * Determine if there is another entry with the same normalized form.
       
   644  * For performance purposes, either zn or name must be provided (the
       
   645  * other can be NULL).  Note, there usually won't be any hash
       
   646  * conflicts, in which case we don't need the concatenated/normalized
       
   647  * form of the name.  But all callers have one of these on hand anyway,
       
   648  * so might as well take advantage.  A cleaner but slower interface
       
   649  * would accept neither argument, and compute the normalized name as
       
   650  * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
       
   651  */
       
   652 boolean_t
       
   653 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
       
   654     const char *name, zap_t *zap)
       
   655 {
       
   656 	uint64_t chunk;
       
   657 	struct zap_leaf_entry *le;
       
   658 	boolean_t allocdzn = B_FALSE;
       
   659 
       
   660 	if (zap->zap_normflags == 0)
       
   661 		return (B_FALSE);
       
   662 
       
   663 	for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
       
   664 	    chunk != CHAIN_END; chunk = le->le_next) {
       
   665 		le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
       
   666 		if (le->le_hash != zeh->zeh_hash)
       
   667 			continue;
       
   668 		if (le->le_cd == zeh->zeh_cd)
       
   669 			continue;
       
   670 
       
   671 		if (zn == NULL) {
       
   672 			zn = zap_name_alloc(zap, name, MT_FIRST);
       
   673 			allocdzn = B_TRUE;
       
   674 		}
       
   675 		if (zap_leaf_array_match(zeh->zeh_leaf, zn,
       
   676 		    le->le_name_chunk, le->le_name_length)) {
       
   677 			if (allocdzn)
       
   678 				zap_name_free(zn);
       
   679 			return (B_TRUE);
       
   680 		}
       
   681 	}
       
   682 	if (allocdzn)
       
   683 		zap_name_free(zn);
       
   684 	return (B_FALSE);
       
   685 }
       
   686 
       
   687 /*
   594  * Routines for transferring entries between leafs.
   688  * Routines for transferring entries between leafs.
   595  */
   689  */
   596 
   690 
   597 static void
   691 static uint16_t *
   598 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
   692 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
   599 {
   693 {
   600 	struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
   694 	struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
   601 	uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash);
   695 	struct zap_leaf_entry *le2;
   602 	le->le_next = *ptr;
   696 	uint16_t *chunkp;
   603 	*ptr = entry;
   697 
       
   698 	/*
       
   699 	 * keep the entry chain sorted by cd
       
   700 	 * NB: this will not cause problems for unsorted leafs, though
       
   701 	 * it is unnecessary there.
       
   702 	 */
       
   703 	for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
       
   704 	    *chunkp != CHAIN_END; chunkp = &le2->le_next) {
       
   705 		le2 = ZAP_LEAF_ENTRY(l, *chunkp);
       
   706 		if (le2->le_cd > le->le_cd)
       
   707 			break;
       
   708 	}
       
   709 
       
   710 	le->le_next = *chunkp;
       
   711 	*chunkp = entry;
       
   712 	return (chunkp);
   604 }
   713 }
   605 
   714 
   606 static uint16_t
   715 static uint16_t
   607 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
   716 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
   608 {
   717 {
   642 
   751 
   643 	chunk = zap_leaf_chunk_alloc(nl);
   752 	chunk = zap_leaf_chunk_alloc(nl);
   644 	nle = ZAP_LEAF_ENTRY(nl, chunk);
   753 	nle = ZAP_LEAF_ENTRY(nl, chunk);
   645 	*nle = *le; /* structure assignment */
   754 	*nle = *le; /* structure assignment */
   646 
   755 
   647 	zap_leaf_rehash_entry(nl, chunk);
   756 	(void) zap_leaf_rehash_entry(nl, chunk);
   648 
   757 
   649 	nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
   758 	nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
   650 	nle->le_value_chunk =
   759 	nle->le_value_chunk =
   651 	    zap_leaf_transfer_array(l, le->le_value_chunk, nl);
   760 	    zap_leaf_transfer_array(l, le->le_value_chunk, nl);
   652 
   761 
   658 
   767 
   659 /*
   768 /*
   660  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
   769  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
   661  */
   770  */
   662 void
   771 void
   663 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl)
   772 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, int version)
   664 {
   773 {
   665 	int i;
   774 	int i;
   666 	int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
   775 	int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
   667 
   776 
   668 	/* set new prefix and prefix_len */
   777 	/* set new prefix and prefix_len */
   671 	nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
   780 	nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
   672 	nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
   781 	nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
   673 
   782 
   674 	/* break existing hash chains */
   783 	/* break existing hash chains */
   675 	zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
   784 	zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
       
   785 
       
   786 	if (version >= SPA_VERSION_NORMALIZATION)
       
   787 		l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
   676 
   788 
   677 	/*
   789 	/*
   678 	 * Transfer entries whose hash bit 'bit' is set to nl; rehash
   790 	 * Transfer entries whose hash bit 'bit' is set to nl; rehash
   679 	 * the remaining entries
   791 	 * the remaining entries
   680 	 *
   792 	 *
   689 			continue;
   801 			continue;
   690 
   802 
   691 		if (le->le_hash & (1ULL << bit))
   803 		if (le->le_hash & (1ULL << bit))
   692 			zap_leaf_transfer_entry(l, i, nl);
   804 			zap_leaf_transfer_entry(l, i, nl);
   693 		else
   805 		else
   694 			zap_leaf_rehash_entry(l, i);
   806 			(void) zap_leaf_rehash_entry(l, i);
   695 	}
   807 	}
   696 }
   808 }
   697 
   809 
   698 void
   810 void
   699 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
   811 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)