usr/src/uts/common/fs/zfs/lzjb.c
changeset 10922 e2081f502306
parent 3886 3291401d66a6
child 12771 9e4f2f9983b9
--- a/usr/src/uts/common/fs/zfs/lzjb.c	Fri Oct 30 18:47:17 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/lzjb.c	Sun Nov 01 14:14:46 2009 -0800
@@ -20,18 +20,18 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * We keep our own copy of this algorithm for 2 main reasons:
- * 	1. If we didn't, anyone modifying common/os/compress.c would
+ *	1. If we didn't, anyone modifying common/os/compress.c would
  *         directly break our on disk format
- * 	2. Our version of lzjb does not have a number of checks that the
+ *	2. Our version of lzjb does not have a number of checks that the
  *         common/os version needs and uses
+ *	3. We initialize the lempel to ensure deterministic results,
+ *	   so that identical blocks can always be deduplicated.
  * In particular, we are adding the "feature" that compress() can
  * take a destination buffer size and return -1 if the data will not
  * compress to d_len or less.
@@ -43,7 +43,7 @@
 #define	MATCH_MIN	3
 #define	MATCH_MAX	((1 << MATCH_BITS) + (MATCH_MIN - 1))
 #define	OFFSET_MASK	((1 << (16 - MATCH_BITS)) - 1)
-#define	LEMPEL_SIZE	256
+#define	LEMPEL_SIZE	1024
 
 /*ARGSUSED*/
 size_t
@@ -53,20 +53,14 @@
 	uchar_t *dst = d_start;
 	uchar_t *cpy, *copymap;
 	int copymask = 1 << (NBBY - 1);
-	int mlen, offset;
+	int mlen, offset, hash;
 	uint16_t *hp;
-	uint16_t lempel[LEMPEL_SIZE];	/* uninitialized; see above */
+	uint16_t lempel[LEMPEL_SIZE] = { 0 };
 
 	while (src < (uchar_t *)s_start + s_len) {
 		if ((copymask <<= 1) == (1 << NBBY)) {
-			if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) {
-				if (d_len != s_len)
-					return (s_len);
-				mlen = s_len;
-				for (src = s_start, dst = d_start; mlen; mlen--)
-					*dst++ = *src++;
+			if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY)
 				return (s_len);
-			}
 			copymask = 1;
 			copymap = dst;
 			*dst++ = 0;
@@ -75,8 +69,10 @@
 			*dst++ = *src++;
 			continue;
 		}
-		hp = &lempel[((src[0] + 13) ^ (src[1] - 13) ^ src[2]) &
-		    (LEMPEL_SIZE - 1)];
+		hash = (src[0] << 16) + (src[1] << 8) + src[2];
+		hash += hash >> 9;
+		hash += hash >> 5;
+		hp = &lempel[hash & (LEMPEL_SIZE - 1)];
 		offset = (intptr_t)(src - *hp) & OFFSET_MASK;
 		*hp = (uint16_t)(uintptr_t)src;
 		cpy = src - offset;