From 1c8d68fbe1c5d7354172e68edbbedadf99c6d616 Mon Sep 17 00:00:00 2001 From: Dylan Drop Date: Mon, 13 Feb 2017 21:44:51 -0500 Subject: [PATCH 1/2] Hash should not default to 11 if it is specified --- spec/std/hash_spec.cr | 6 ++++++ src/hash.cr | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr index 4097a4aefecb..b01c7ebb0264 100644 --- a/spec/std/hash_spec.cr +++ b/spec/std/hash_spec.cr @@ -323,6 +323,12 @@ describe "Hash" do h1["bar"].should eq([2]) end + it "raises on initialize if set to a non-postive value" do + expect_raises ArgumentError, "Hash capacity must be positive" do + Hash(Int32, Int32).new(initial_capacity: -1) { 1234 } + end + end + it "initializes with default value" do h = Hash(Int32, Int32).new(10) h[0].should eq(10) diff --git a/src/hash.cr b/src/hash.cr index 6d7fe7567b52..3ad1509cf6c4 100644 --- a/src/hash.cr +++ b/src/hash.cr @@ -13,8 +13,8 @@ class Hash(K, V) def initialize(block : (Hash(K, V), K -> V)? = nil, initial_capacity = nil) initial_capacity ||= 11 - initial_capacity = 11 if initial_capacity < 11 initial_capacity = initial_capacity.to_i + raise ArgumentError.new("Hash capacity must be positive") if initial_capacity <= 0 @buckets = Pointer(Entry(K, V)?).malloc(initial_capacity) @buckets_size = initial_capacity @size = 0 From cd48b2bb87f0b41cc9a74b9b6252fcf893aaddc5 Mon Sep 17 00:00:00 2001 From: Dylan Drop Date: Sun, 26 Feb 2017 15:04:14 -0500 Subject: [PATCH 2/2] Create bucket size from next largest prime of passed capacity --- spec/std/hash_spec.cr | 15 +++++--- src/hash.cr | 80 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/spec/std/hash_spec.cr b/spec/std/hash_spec.cr index b01c7ebb0264..fc1d90f394f2 100644 --- a/spec/std/hash_spec.cr +++ b/spec/std/hash_spec.cr @@ -324,7 +324,7 @@ describe "Hash" do end it "raises on initialize if set to a non-postive value" do - expect_raises ArgumentError, "Hash capacity must be positive" do + expect_raises ArgumentError, "Hash capacity cannot be negative" do Hash(Int32, Int32).new(initial_capacity: -1) { 1234 } end end @@ -811,20 +811,25 @@ describe "Hash" do items.uniq.size end - it "creates with initial capacity" do + it "sets the initial capacity when not passed" do + hash = Hash(Int32, Int32).new + hash.@buckets_size.should eq(Hash::PRIMES[0]) + end + + it "creates the initial capacity from next largest prime of passed capacity" do hash = Hash(Int32, Int32).new(initial_capacity: 1234) - hash.@buckets_size.should eq(1234) + hash.@buckets_size.should eq(2053) end it "creates with initial capacity and default value" do hash = Hash(Int32, Int32).new(default_value: 3, initial_capacity: 1234) hash[1].should eq(3) - hash.@buckets_size.should eq(1234) + hash.@buckets_size.should eq(2053) end it "creates with initial capacity and block" do hash = Hash(Int32, Int32).new(initial_capacity: 1234) { |h, k| h[k] = 3 } hash[1].should eq(3) - hash.@buckets_size.should eq(1234) + hash.@buckets_size.should eq(2053) end end diff --git a/src/hash.cr b/src/hash.cr index 3ad1509cf6c4..a0661a7739e0 100644 --- a/src/hash.cr +++ b/src/hash.cr @@ -11,20 +11,50 @@ class Hash(K, V) @last : Entry(K, V)? @block : (self, K -> V)? + # Instantiates a new Hash. The initial capacity of the + # buckets is determined by finding the nearest prime of the form 2^n + a + # above the number you passed in. In the following example, the size of the + # hash would be 2053. + # + # ``` + # hash = Hash(Int32, Int32).new(default_value: 3, initial_capacity: 1234) + # hash[1] # => 3 + # ``` def initialize(block : (Hash(K, V), K -> V)? = nil, initial_capacity = nil) - initial_capacity ||= 11 - initial_capacity = initial_capacity.to_i - raise ArgumentError.new("Hash capacity must be positive") if initial_capacity <= 0 + initial_capacity = if initial_capacity.nil? + PRIMES[0] + else + raise ArgumentError.new("Hash capacity cannot be negative") if initial_capacity < 0 + next_largest_prime(initial_capacity) + end @buckets = Pointer(Entry(K, V)?).malloc(initial_capacity) @buckets_size = initial_capacity @size = 0 @block = block end + # Instantiates a new Hash with an initial capacity. The initial capacity of + # the buckets is determined by finding the nearest prime of the form 2^n + a + # above the number you passed in. + # + # A block can be used to specify the values of the hash. + # + # ``` + # hash = Hash(Int32, Int32).new(initial_capacity: 1234) { |h, k| h[k] = 3 } + # hash[1] # => 3 + # ``` def self.new(initial_capacity = nil, &block : (Hash(K, V), K -> V)) new block, initial_capacity: initial_capacity end + # Instantiates a new Hash with a default value. The initial capacity of the + # buckets is determined by finding the nearest prime of the form 2^n + a + # above the number you passed in. + # + # ``` + # hash = Hash(Int32, Int32).new(default_value: 3, initial_capacity: 1234) + # hash[1] # => 3 + # ``` def self.new(default_value : V, initial_capacity = nil) new(initial_capacity: initial_capacity) { default_value } end @@ -834,6 +864,50 @@ class Hash(K, V) find_entry_in_bucket entry, key end + # Table of prime numbers 2^n+a, 2<=n<=30. + PRIMES = Int32.static_array( + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85 + ) + MINSIZE = 8 + + private def next_largest_prime(passed_size) + new_size = MINSIZE + PRIMES.each do |prime| + if new_size > passed_size + return prime + end + new_size <<= 1 + end + raise "Requested initial capacity for hash was too high" + end + private def insert_in_bucket(index, key, value) entry = @buckets[index] if entry