1499N/AFrom f2a571dae7d70f7e3b59022834d8003ecd2df884 Mon Sep 17 00:00:00 2001
1499N/AFrom: Yves Orton <demerphq@gmail.com>
1499N/ADate: Tue, 12 Feb 2013 10:53:05 +0100
1499N/ASubject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH
1499N/A after hsplit()
1499N/A
1499N/ATriggering a hsplit due to long chain length allows an attacker
1499N/Ato create a carefully chosen set of keys which can cause the hash
1499N/Ato use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory
1499N/Aexhaustion. Doing so also takes non trivial time.
1499N/A
1499N/AEliminating this check, and only inspecting chain length after a
1499N/Anormal hsplit() (triggered when keys>buckets) prevents the attack
1499N/Aentirely, and makes such attacks relatively benign.
1499N/A
1499N/A(cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac)
1499N/A---
1499N/A ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++--
1499N/A hv.c | 26 ++++++--------------------
1499N/A t/op/hash.t | 20 +++++++++++++++++---
1499N/A 3 files changed, 39 insertions(+), 25 deletions(-)
1499N/A
1499N/Adiff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
1499N/Aindex 2cfb4e8..d58f053 100644
1499N/A--- a/ext/Hash-Util-FieldHash/t/10_hash.t
1499N/A+++ b/ext/Hash-Util-FieldHash/t/10_hash.t
1499N/A@@ -38,15 +38,29 @@ use constant START => "a";
1499N/A
1499N/A # some initial hash data
1499N/A fieldhash my %h2;
1499N/A-%h2 = map {$_ => 1} 'a'..'cc';
1499N/A+my $counter= "a";
1499N/A+$h2{$counter++}++ while $counter ne 'cd';
1499N/A
1499N/A ok (!Internals::HvREHASH(%h2),
1499N/A "starting with pre-populated non-pathological hash (rehash flag if off)");
1499N/A
1499N/A my @keys = get_keys(\%h2);
1499N/A+my $buckets= buckets(\%h2);
1499N/A $h2{$_}++ for @keys;
1499N/A+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
1499N/A ok (Internals::HvREHASH(%h2),
1499N/A- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
1499N/A+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
1499N/A+
1499N/A+# returns the number of buckets in a hash
1499N/A+sub buckets {
1499N/A+ my $hr = shift;
1499N/A+ my $keys_buckets= scalar(%$hr);
1499N/A+ if ($keys_buckets=~m!/([0-9]+)\z!) {
1499N/A+ return 0+$1;
1499N/A+ } else {
1499N/A+ return 8;
1499N/A+ }
1499N/A+}
1499N/A
1499N/A sub get_keys {
1499N/A my $hr = shift;
1499N/Adiff --git a/hv.c b/hv.c
1499N/Aindex 89c6456..8659678 100644
1499N/A--- a/hv.c
1499N/A+++ b/hv.c
1499N/A@@ -35,7 +35,8 @@ holds the key and hash value.
1499N/A #define PERL_HASH_INTERNAL_ACCESS
1499N/A #include "perl.h"
1499N/A
1499N/A-#define HV_MAX_LENGTH_BEFORE_SPLIT 14
1499N/A+#define HV_MAX_LENGTH_BEFORE_REHASH 14
1499N/A+#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
1499N/A
1499N/A static const char S_strtab_error[]
1499N/A = "Cannot modify shared string table in hv_%s";
1499N/A@@ -818,23 +819,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
1499N/A xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
1499N/A if (!counter) { /* initial entry? */
1499N/A xhv->xhv_fill++; /* HvFILL(hv)++ */
1499N/A- } else if (xhv->xhv_keys > (IV)xhv->xhv_max) {
1499N/A+ } else if ( SHOULD_DO_HSPLIT(xhv) ) {
1499N/A hsplit(hv);
1499N/A- } else if(!HvREHASH(hv)) {
1499N/A- U32 n_links = 1;
1499N/A-
1499N/A- while ((counter = HeNEXT(counter)))
1499N/A- n_links++;
1499N/A-
1499N/A- if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) {
1499N/A- /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit
1499N/A- bucket splits on a rehashed hash, as we're not going to
1499N/A- split it again, and if someone is lucky (evil) enough to
1499N/A- get all the keys in one list they could exhaust our memory
1499N/A- as we repeatedly double the number of buckets on every
1499N/A- entry. Linear search feels a less worse thing to do. */
1499N/A- hsplit(hv);
1499N/A- }
1499N/A }
1499N/A }
1499N/A
1499N/A@@ -1180,7 +1166,7 @@ S_hsplit(pTHX_ HV *hv)
1499N/A
1499N/A
1499N/A /* Pick your policy for "hashing isn't working" here: */
1499N/A- if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
1499N/A+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked? */
1499N/A || HvREHASH(hv)) {
1499N/A return;
1499N/A }
1499N/A@@ -2551,8 +2537,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
1499N/A xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
1499N/A if (!next) { /* initial entry? */
1499N/A xhv->xhv_fill++; /* HvFILL(hv)++ */
1499N/A- } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
1499N/A- hsplit(PL_strtab);
1499N/A+ } else if ( SHOULD_DO_HSPLIT(xhv) ) {
1499N/A+ hsplit(PL_strtab);
1499N/A }
1499N/A }
1499N/A
1499N/Adiff --git a/t/op/hash.t b/t/op/hash.t
1499N/Aindex 9bde518..45eb782 100644
1499N/A--- a/t/op/hash.t
1499N/A+++ b/t/op/hash.t
1499N/A@@ -39,22 +39,36 @@ use constant THRESHOLD => 14;
1499N/A use constant START => "a";
1499N/A
1499N/A # some initial hash data
1499N/A-my %h2 = map {$_ => 1} 'a'..'cc';
1499N/A+my %h2;
1499N/A+my $counter= "a";
1499N/A+$h2{$counter++}++ while $counter ne 'cd';
1499N/A
1499N/A ok (!Internals::HvREHASH(%h2),
1499N/A "starting with pre-populated non-pathological hash (rehash flag if off)");
1499N/A
1499N/A my @keys = get_keys(\%h2);
1499N/A+my $buckets= buckets(\%h2);
1499N/A $h2{$_}++ for @keys;
1499N/A+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
1499N/A ok (Internals::HvREHASH(%h2),
1499N/A- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
1499N/A+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
1499N/A+
1499N/A+# returns the number of buckets in a hash
1499N/A+sub buckets {
1499N/A+ my $hr = shift;
1499N/A+ my $keys_buckets= scalar(%$hr);
1499N/A+ if ($keys_buckets=~m!/([0-9]+)\z!) {
1499N/A+ return 0+$1;
1499N/A+ } else {
1499N/A+ return 8;
1499N/A+ }
1499N/A+}
1499N/A
1499N/A sub get_keys {
1499N/A my $hr = shift;
1499N/A
1499N/A # the minimum of bits required to mount the attack on a hash
1499N/A my $min_bits = log(THRESHOLD)/log(2);
1499N/A-
1499N/A # if the hash has already been populated with a significant amount
1499N/A # of entries the number of mask bits can be higher
1499N/A my $keys = scalar keys %$hr;
1499N/A--
1499N/A1.8.1.3
1499N/A
1499N/A