components/perl/perl516/patches/rehash-5.16.2.patch
changeset 1206 8f71b436e7f7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/perl/perl516/patches/rehash-5.16.2.patch	Mon Mar 11 15:41:09 2013 -0700
@@ -0,0 +1,171 @@
+From f1220d61455253b170e81427c9d0357831ca0fac Mon Sep 17 00:00:00 2001
+From: Yves Orton <[email protected]>
+Date: Tue, 12 Feb 2013 10:53:05 +0100
+Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH
+ after hsplit()
+
+Triggering a hsplit due to long chain length allows an attacker
+to create a carefully chosen set of keys which can cause the hash
+to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory
+exhaustion. Doing so also takes non trivial time.
+
+Eliminating this check, and only inspecting chain length after a
+normal hsplit() (triggered when keys>buckets) prevents the attack
+entirely, and makes such attacks relatively benign.
+---
+ ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++--
+ hv.c                                | 35 ++++++++---------------------------
+ t/op/hash.t                         | 20 +++++++++++++++++---
+ 3 files changed, 41 insertions(+), 32 deletions(-)
+
+diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
+index 2cfb4e8..d58f053 100644
+--- a/ext/Hash-Util-FieldHash/t/10_hash.t
++++ b/ext/Hash-Util-FieldHash/t/10_hash.t
+@@ -38,15 +38,29 @@ use constant START     => "a";
+ 
+ # some initial hash data
+ fieldhash my %h2;
+-%h2 = map {$_ => 1} 'a'..'cc';
++my $counter= "a";
++$h2{$counter++}++ while $counter ne 'cd';
+ 
+ ok (!Internals::HvREHASH(%h2), 
+     "starting with pre-populated non-pathological hash (rehash flag if off)");
+ 
+ my @keys = get_keys(\%h2);
++my $buckets= buckets(\%h2);
+ $h2{$_}++ for @keys;
++$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
+ ok (Internals::HvREHASH(%h2), 
+-    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
++    scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
++
++# returns the number of buckets in a hash
++sub buckets {
++    my $hr = shift;
++    my $keys_buckets= scalar(%$hr);
++    if ($keys_buckets=~m!/([0-9]+)\z!) {
++        return 0+$1;
++    } else {
++        return 8;
++    }
++}
+ 
+ sub get_keys {
+     my $hr = shift;
+diff --git a/hv.c b/hv.c
+index 6b66251..a031703 100644
+--- a/hv.c
++++ b/hv.c
+@@ -35,7 +35,8 @@ holds the key and hash value.
+ #define PERL_HASH_INTERNAL_ACCESS
+ #include "perl.h"
+ 
+-#define HV_MAX_LENGTH_BEFORE_SPLIT 14
++#define HV_MAX_LENGTH_BEFORE_REHASH 14
++#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
+ 
+ static const char S_strtab_error[]
+     = "Cannot modify shared string table in hv_%s";
+@@ -798,29 +799,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
+     if (masked_flags & HVhek_ENABLEHVKFLAGS)
+ 	HvHASKFLAGS_on(hv);
+ 
+-    {
+-	const HE *counter = HeNEXT(entry);
+-
+-	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
+-	if (!counter) {				/* initial entry? */
+-	} else if (xhv->xhv_keys > xhv->xhv_max) {
+-		/* Use only the old HvUSEDKEYS(hv) > HvMAX(hv) condition to limit
+-		   bucket splits on a rehashed hash, as we're not going to
+-		   split it again, and if someone is lucky (evil) enough to
+-		   get all the keys in one list they could exhaust our memory
+-		   as we repeatedly double the number of buckets on every
+-		   entry. Linear search feels a less worse thing to do.  */
+-	    hsplit(hv);
+-	} else if(!HvREHASH(hv)) {
+-	    U32 n_links = 1;
+-
+-	    while ((counter = HeNEXT(counter)))
+-		n_links++;
+-
+-	    if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) {
+-		hsplit(hv);
+-	    }
+-	}
++    xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
++    if ( SHOULD_DO_HSPLIT(xhv) ) {
++        hsplit(hv);
+     }
+ 
+     if (return_svp) {
+@@ -1197,7 +1178,7 @@ S_hsplit(pTHX_ HV *hv)
+ 
+ 
+     /* Pick your policy for "hashing isn't working" here:  */
+-    if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked?  */
++    if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked?  */
+ 	|| HvREHASH(hv)) {
+ 	return;
+     }
+@@ -2782,8 +2763,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
+ 
+ 	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
+ 	if (!next) {			/* initial entry? */
+-	} else if (xhv->xhv_keys > xhv->xhv_max /* HvUSEDKEYS(hv) > HvMAX(hv) */) {
+-		hsplit(PL_strtab);
++	} else if ( SHOULD_DO_HSPLIT(xhv) ) {
++            hsplit(PL_strtab);
+ 	}
+     }
+ 
+diff --git a/t/op/hash.t b/t/op/hash.t
+index ef757a3..97eb81b 100644
+--- a/t/op/hash.t
++++ b/t/op/hash.t
+@@ -39,22 +39,36 @@ use constant THRESHOLD => 14;
+ use constant START     => "a";
+ 
+ # some initial hash data
+-my %h2 = map {$_ => 1} 'a'..'cc';
++my %h2;
++my $counter= "a";
++$h2{$counter++}++ while $counter ne 'cd';
+ 
+ ok (!Internals::HvREHASH(%h2), 
+     "starting with pre-populated non-pathological hash (rehash flag if off)");
+ 
+ my @keys = get_keys(\%h2);
++my $buckets= buckets(\%h2);
+ $h2{$_}++ for @keys;
++$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
+ ok (Internals::HvREHASH(%h2), 
+-    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
++    scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
++
++# returns the number of buckets in a hash
++sub buckets {
++    my $hr = shift;
++    my $keys_buckets= scalar(%$hr);
++    if ($keys_buckets=~m!/([0-9]+)\z!) {
++        return 0+$1;
++    } else {
++        return 8;
++    }
++}
+ 
+ sub get_keys {
+     my $hr = shift;
+ 
+     # the minimum of bits required to mount the attack on a hash
+     my $min_bits = log(THRESHOLD)/log(2);
+-
+     # if the hash has already been populated with a significant amount
+     # of entries the number of mask bits can be higher
+     my $keys = scalar keys %$hr;
+-- 
+1.8.1.3
+
+