|
1 From f1220d61455253b170e81427c9d0357831ca0fac Mon Sep 17 00:00:00 2001 |
|
2 From: Yves Orton <[email protected]> |
|
3 Date: Tue, 12 Feb 2013 10:53:05 +0100 |
|
4 Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH |
|
5 after hsplit() |
|
6 |
|
7 Triggering a hsplit due to long chain length allows an attacker |
|
8 to create a carefully chosen set of keys which can cause the hash |
|
9 to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory |
|
10 exhaustion. Doing so also takes non trivial time. |
|
11 |
|
12 Eliminating this check, and only inspecting chain length after a |
|
13 normal hsplit() (triggered when keys>buckets) prevents the attack |
|
14 entirely, and makes such attacks relatively benign. |
|
15 --- |
|
16 ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++-- |
|
17 hv.c | 35 ++++++++--------------------------- |
|
18 t/op/hash.t | 20 +++++++++++++++++--- |
|
19 3 files changed, 41 insertions(+), 32 deletions(-) |
|
20 |
|
21 diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t |
|
22 index 2cfb4e8..d58f053 100644 |
|
23 --- a/ext/Hash-Util-FieldHash/t/10_hash.t |
|
24 +++ b/ext/Hash-Util-FieldHash/t/10_hash.t |
|
25 @@ -38,15 +38,29 @@ use constant START => "a"; |
|
26 |
|
27 # some initial hash data |
|
28 fieldhash my %h2; |
|
29 -%h2 = map {$_ => 1} 'a'..'cc'; |
|
30 +my $counter= "a"; |
|
31 +$h2{$counter++}++ while $counter ne 'cd'; |
|
32 |
|
33 ok (!Internals::HvREHASH(%h2), |
|
34 "starting with pre-populated non-pathological hash (rehash flag if off)"); |
|
35 |
|
36 my @keys = get_keys(\%h2); |
|
37 +my $buckets= buckets(\%h2); |
|
38 $h2{$_}++ for @keys; |
|
39 +$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split |
|
40 ok (Internals::HvREHASH(%h2), |
|
41 - scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); |
|
42 + scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split"); |
|
43 + |
|
44 +# returns the number of buckets in a hash |
|
45 +sub buckets { |
|
46 + my $hr = shift; |
|
47 + my $keys_buckets= scalar(%$hr); |
|
48 + if ($keys_buckets=~m!/([0-9]+)\z!) { |
|
49 + return 0+$1; |
|
50 + } else { |
|
51 + return 8; |
|
52 + } |
|
53 +} |
|
54 |
|
55 sub get_keys { |
|
56 my $hr = shift; |
|
57 diff --git a/hv.c b/hv.c |
|
58 index 6b66251..a031703 100644 |
|
59 --- a/hv.c |
|
60 +++ b/hv.c |
|
61 @@ -35,7 +35,8 @@ holds the key and hash value. |
|
62 #define PERL_HASH_INTERNAL_ACCESS |
|
63 #include "perl.h" |
|
64 |
|
65 -#define HV_MAX_LENGTH_BEFORE_SPLIT 14 |
|
66 +#define HV_MAX_LENGTH_BEFORE_REHASH 14 |
|
67 +#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */ |
|
68 |
|
69 static const char S_strtab_error[] |
|
70 = "Cannot modify shared string table in hv_%s"; |
|
71 @@ -798,29 +799,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, |
|
72 if (masked_flags & HVhek_ENABLEHVKFLAGS) |
|
73 HvHASKFLAGS_on(hv); |
|
74 |
|
75 - { |
|
76 - const HE *counter = HeNEXT(entry); |
|
77 - |
|
78 - xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ |
|
79 - if (!counter) { /* initial entry? */ |
|
80 - } else if (xhv->xhv_keys > xhv->xhv_max) { |
|
81 - /* Use only the old HvUSEDKEYS(hv) > HvMAX(hv) condition to limit |
|
82 - bucket splits on a rehashed hash, as we're not going to |
|
83 - split it again, and if someone is lucky (evil) enough to |
|
84 - get all the keys in one list they could exhaust our memory |
|
85 - as we repeatedly double the number of buckets on every |
|
86 - entry. Linear search feels a less worse thing to do. */ |
|
87 - hsplit(hv); |
|
88 - } else if(!HvREHASH(hv)) { |
|
89 - U32 n_links = 1; |
|
90 - |
|
91 - while ((counter = HeNEXT(counter))) |
|
92 - n_links++; |
|
93 - |
|
94 - if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) { |
|
95 - hsplit(hv); |
|
96 - } |
|
97 - } |
|
98 + xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ |
|
99 + if ( SHOULD_DO_HSPLIT(xhv) ) { |
|
100 + hsplit(hv); |
|
101 } |
|
102 |
|
103 if (return_svp) { |
|
104 @@ -1197,7 +1178,7 @@ S_hsplit(pTHX_ HV *hv) |
|
105 |
|
106 |
|
107 /* Pick your policy for "hashing isn't working" here: */ |
|
108 - if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */ |
|
109 + if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked? */ |
|
110 || HvREHASH(hv)) { |
|
111 return; |
|
112 } |
|
113 @@ -2782,8 +2763,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) |
|
114 |
|
115 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ |
|
116 if (!next) { /* initial entry? */ |
|
117 - } else if (xhv->xhv_keys > xhv->xhv_max /* HvUSEDKEYS(hv) > HvMAX(hv) */) { |
|
118 - hsplit(PL_strtab); |
|
119 + } else if ( SHOULD_DO_HSPLIT(xhv) ) { |
|
120 + hsplit(PL_strtab); |
|
121 } |
|
122 } |
|
123 |
|
124 diff --git a/t/op/hash.t b/t/op/hash.t |
|
125 index ef757a3..97eb81b 100644 |
|
126 --- a/t/op/hash.t |
|
127 +++ b/t/op/hash.t |
|
128 @@ -39,22 +39,36 @@ use constant THRESHOLD => 14; |
|
129 use constant START => "a"; |
|
130 |
|
131 # some initial hash data |
|
132 -my %h2 = map {$_ => 1} 'a'..'cc'; |
|
133 +my %h2; |
|
134 +my $counter= "a"; |
|
135 +$h2{$counter++}++ while $counter ne 'cd'; |
|
136 |
|
137 ok (!Internals::HvREHASH(%h2), |
|
138 "starting with pre-populated non-pathological hash (rehash flag if off)"); |
|
139 |
|
140 my @keys = get_keys(\%h2); |
|
141 +my $buckets= buckets(\%h2); |
|
142 $h2{$_}++ for @keys; |
|
143 +$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split |
|
144 ok (Internals::HvREHASH(%h2), |
|
145 - scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); |
|
146 + scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split"); |
|
147 + |
|
148 +# returns the number of buckets in a hash |
|
149 +sub buckets { |
|
150 + my $hr = shift; |
|
151 + my $keys_buckets= scalar(%$hr); |
|
152 + if ($keys_buckets=~m!/([0-9]+)\z!) { |
|
153 + return 0+$1; |
|
154 + } else { |
|
155 + return 8; |
|
156 + } |
|
157 +} |
|
158 |
|
159 sub get_keys { |
|
160 my $hr = shift; |
|
161 |
|
162 # the minimum of bits required to mount the attack on a hash |
|
163 my $min_bits = log(THRESHOLD)/log(2); |
|
164 - |
|
165 # if the hash has already been populated with a significant amount |
|
166 # of entries the number of mask bits can be higher |
|
167 my $keys = scalar keys %$hr; |
|
168 -- |
|
169 1.8.1.3 |
|
170 |
|
171 |