From d863a4a2ef139920b5f4e71247b2e81e783b46c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Bori?= Date: Sun, 1 Sep 2024 01:40:08 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20(method):=20Implemented=20open=20ad?= =?UTF-8?q?dressing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + src/CMakeLists.txt | 4 +- src/common/CMakeLists.txt | 1 + src/common/hash.h | 4 +- src/common/key.h | 4 +- src/common/table.h | 7 ++ src/common/types.h | 4 +- src/open_addressing/CMakeLists.txt | 15 +++ src/open_addressing/entry.h | 17 +++ src/open_addressing/table.c | 184 +++++++++++++++++++++++++++++ src/open_addressing/table.h | 34 ++++++ src/separate_chaining/entry.c | 17 +-- src/separate_chaining/entry.h | 1 - src/separate_chaining/table.c | 50 ++++---- src/separate_chaining/table.h | 7 +- src/tinyhash.c | 17 +-- src/tinyhash.h | 8 +- tests/test_table.c | 51 +++++--- tests/tests.c | 17 ++- 19 files changed, 361 insertions(+), 82 deletions(-) create mode 100644 src/common/table.h create mode 100644 src/open_addressing/CMakeLists.txt create mode 100644 src/open_addressing/entry.h create mode 100644 src/open_addressing/table.c create mode 100644 src/open_addressing/table.h diff --git a/README.md b/README.md index b0df514..937530a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ This is a library containing multiple C implementations of hashmap. The public A Here are the different methods implemented: - [Separate chaining](./src/separate_chaining/) +- [Open addressing](./src/open_addressing/) ## 📖 Build and run diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1de8d06..6f80368 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ endif () add_subdirectory(common) add_subdirectory(separate_chaining) +add_subdirectory(open_addressing) set (TINYHASH_SRC tinyhash.c @@ -24,7 +25,8 @@ add_library( ${TINYHASH_SRC} $ $ + $ ) -install (TARGETS ${TINYHASH_NAME} DESTINATION lib) +install (TARGETS ${TINYHASH_NAME} DESTINATION lib) install (FILES ${TINYHASH_HEADERS} DESTINATION include/${TINYHASH_NAME}) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 498b432..acd12cb 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -7,6 +7,7 @@ set(COMMON_HEADERS hash.h key.h types.h + table.h ) add_library(common_objects OBJECT ${COMMON_SRC}) diff --git a/src/common/hash.h b/src/common/hash.h index e7753e6..cdedca0 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -1,5 +1,5 @@ -#ifndef __TINYHASH_HASH_H__ -#define __TINYHASH_HASH_H__ +#ifndef __TINYHASH_COMMON_HASH_H__ +#define __TINYHASH_COMMON_HASH_H__ #include #include diff --git a/src/common/key.h b/src/common/key.h index 8f9aa1d..d7276eb 100644 --- a/src/common/key.h +++ b/src/common/key.h @@ -1,5 +1,5 @@ -#ifndef __TINYHASH_KEY_H__ -#define __TINYHASH_KEY_H__ +#ifndef __TINYHASH_COMMON_KEY_H__ +#define __TINYHASH_COMMON_KEY_H__ #include #include diff --git a/src/common/table.h b/src/common/table.h new file mode 100644 index 0000000..0aa95c9 --- /dev/null +++ b/src/common/table.h @@ -0,0 +1,7 @@ +#ifndef __TINYHASH_COMMON_TABLE_H__ +#define __TINYHASH_COMMON_TABLE_H__ + +#define TH_TABLE_NEXT_CAPACITY(capacity) \ + (capacity) == 0 ? 8 : (capacity) * 2 + +#endif diff --git a/src/common/types.h b/src/common/types.h index 8eb07cf..67e856b 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -1,5 +1,5 @@ -#ifndef __TINYHASH_TYPES_H__ -#define __TINYHASH_TYPES_H__ +#ifndef __TINYHASH_COMMON_TYPES_H__ +#define __TINYHASH_COMMON_TYPES_H__ typedef void *th_any_t; diff --git a/src/open_addressing/CMakeLists.txt b/src/open_addressing/CMakeLists.txt new file mode 100644 index 0000000..f423fe3 --- /dev/null +++ b/src/open_addressing/CMakeLists.txt @@ -0,0 +1,15 @@ +set(OPEN_ADDRESSING_SRC + table.c +) + +set(OPEN_ADDRESSING_HEADERS + entry.h + table.h +) + +add_library(open_addressing_objects OBJECT ${OPEN_ADDRESSING_SRC}) + +install( + FILES ${OPEN_ADDRESSING_HEADERS} + DESTINATION include/${TINYHASH_NAME}/open_addressing +) diff --git a/src/open_addressing/entry.h b/src/open_addressing/entry.h new file mode 100644 index 0000000..44d241f --- /dev/null +++ b/src/open_addressing/entry.h @@ -0,0 +1,17 @@ +#ifndef __TINYHASH_OA_ENTRY_H__ +#define __TINYHASH_OA_ENTRY_H__ + +#include +#include +#include + +#include "../common/hash.h" +#include "../common/key.h" + +typedef struct th_oa_entry_s { + th_key_t *key; + th_any_t value; + bool is_tombstone; +} th_oa_entry_t; + +#endif diff --git a/src/open_addressing/table.c b/src/open_addressing/table.c new file mode 100644 index 0000000..d52e8b4 --- /dev/null +++ b/src/open_addressing/table.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include + +#include "table.h" +#include "../common/table.h" +#include "entry.h" + +static bool th_oa_table_put_with_key(th_oa_table_t *table, th_key_t *key, + th_any_t value); + +void th_oa_table_init(th_oa_table_t *table) +{ + table->capacity = 0; + table->count = 0; + table->entries = NULL; +} + +static th_oa_table_t *_th_oa_table_create() +{ + th_oa_table_t *table = malloc(sizeof(th_oa_table_t)); + + if (table == NULL) return NULL; + + th_oa_table_init(table); + + return table; +} + +th_generic_table_t th_oa_table_create() +{ + return (th_generic_table_t) _th_oa_table_create(); +} + +static bool th_oa_table_copy(th_oa_table_t *dest, th_oa_table_t *src) +{ + bool success; + + for (int i = 0; i < src->capacity; i++) { + th_oa_entry_t *entry = &src->entries[i]; + + if (entry->key == NULL) continue; + + success = th_oa_table_put_with_key( + dest, + entry->key, + entry->value + ); + + if (success == false) return false; + } + + return true; +} + +static bool th_oa_table_increase(th_oa_table_t *table) +{ + th_oa_table_t new_table; + bool success; + + th_oa_table_init(&new_table); + + new_table.capacity = TH_TABLE_NEXT_CAPACITY(table->capacity); + + size_t size = sizeof(th_oa_entry_t) * new_table.capacity; + + new_table.entries = malloc(size); + if (new_table.entries == NULL) return false; + + memset(new_table.entries, 0, size); + + success = th_oa_table_copy(&new_table, table); + if (success == false) return false; + + th_oa_table_free(table); + + *table = new_table; + + return true; +} + +static th_oa_entry_t *th_oa_table_find(th_oa_table_t *table, th_key_t *key) +{ + int index = key->hash % table->capacity; + + th_oa_entry_t *tombstone = NULL; + for (;;) { + th_oa_entry_t *entry = &table->entries[index]; + + if (entry->key == NULL) { + if (entry->is_tombstone == false) { + return tombstone != NULL ? tombstone : entry; + } else { + if (tombstone == NULL) tombstone = entry; + } + } else if (th_key_is_equal(key, entry->key) == true) { + return entry; + } + + index = (index + 1) % table->capacity; + } +} + +th_any_t th_oa_table_get(th_generic_table_t generic_table, th_any_t data, + size_t data_size) +{ + th_oa_table_t *table = (th_oa_table_t *) generic_table; + if (table->capacity == 0) return NULL; + + th_key_t key = th_key_create(data, data_size); + th_oa_entry_t *entry = th_oa_table_find(table, &key); + if (entry->key == NULL) return NULL; + + return entry->value; +} + +static bool th_oa_table_put_with_key(th_oa_table_t *table, th_key_t *key, + th_any_t value) +{ + if (table->count >= (table->capacity * TH_OA_LOAD_FACTOR)) { + if (th_oa_table_increase(table) == false) return false; + } + + th_oa_entry_t *entry = th_oa_table_find(table, key); + + if (entry->key == NULL) { + if (entry->is_tombstone == false) table->count++; + } else { + free(entry->key); + } + + entry->key = malloc(sizeof(th_key_t)); + *entry->key = *key; + entry->value = value; + entry->is_tombstone = false; + + return true; +} + +bool th_oa_table_put(th_generic_table_t generic_table, th_any_t data, + size_t data_size, th_any_t value) +{ + th_oa_table_t *table = (th_oa_table_t *) generic_table; + th_key_t key = th_key_create(data, data_size); + + return th_oa_table_put_with_key(table, &key, value); +} + +bool th_oa_table_delete(th_generic_table_t generic_table, th_any_t data, + size_t data_size) +{ + th_oa_table_t *table = (th_oa_table_t *) generic_table; + if (table->capacity == 0) return false; + + th_key_t key = th_key_create(data, data_size); + + th_oa_entry_t *entry = th_oa_table_find(table, &key); + if (entry->key == NULL) return false; + + free(entry->key); + + entry->key = NULL; + entry->is_tombstone = true; + + return true; +} + +void th_oa_table_free(th_generic_table_t generic_table) +{ + th_oa_table_t *table = (th_oa_table_t *) generic_table; + + for (int i = 0; i < table->capacity; i++) { + th_oa_entry_t *entry = &table->entries[i]; + + if (entry->key == NULL) continue; + + free(entry->key); + } + + if (table->entries != NULL) { + free(table->entries); + } +} diff --git a/src/open_addressing/table.h b/src/open_addressing/table.h new file mode 100644 index 0000000..688cad1 --- /dev/null +++ b/src/open_addressing/table.h @@ -0,0 +1,34 @@ +#ifndef __TINYHASH_OA_TABLE_H__ +#define __TINYHASH_OA_TABLE_H__ + +#include +#include +#include + +#include "entry.h" +#include "../common/types.h" + +#define TH_OA_LOAD_FACTOR 0.75 + +typedef struct { + uint32_t count; + uint32_t capacity; + th_oa_entry_t *entries; +} th_oa_table_t; + +void th_oa_table_init(th_oa_table_t *table); + +th_generic_table_t th_oa_table_create(); + +th_any_t th_oa_table_get(th_generic_table_t table, th_any_t data, + size_t data_size); + +bool th_oa_table_put(th_generic_table_t table, th_any_t data, + size_t data_size, th_any_t value); + +void th_oa_table_free(th_generic_table_t table); + +bool th_oa_table_delete(th_generic_table_t table, th_any_t data, + size_t data_size); + +#endif diff --git a/src/separate_chaining/entry.c b/src/separate_chaining/entry.c index 9c36362..968f45d 100644 --- a/src/separate_chaining/entry.c +++ b/src/separate_chaining/entry.c @@ -14,23 +14,16 @@ static th_sc_entry_t *th_sc_entry_new(th_key_t *key, th_any_t value) return entry; } -static void th_sc_entry_raw_add(th_sc_entry_t **root, th_sc_entry_t *entry) -{ - if (*root != NULL) { - (*root)->previous = entry; - } - - entry->next = *root; - *root = entry; -} - bool th_sc_entry_add(th_sc_entry_t **root, th_key_t *key, th_any_t value) { th_sc_entry_t *entry = th_sc_entry_new(key, value); if (entry == NULL) return false; - - th_sc_entry_raw_add(root, entry); + + if (*root != NULL) (*root)->previous = entry; + + entry->next = *root; + *root = entry; return true; } diff --git a/src/separate_chaining/entry.h b/src/separate_chaining/entry.h index ad5de22..0885736 100644 --- a/src/separate_chaining/entry.h +++ b/src/separate_chaining/entry.h @@ -15,7 +15,6 @@ typedef struct th_sc_entry_s { struct th_sc_entry_s *next; } th_sc_entry_t; - bool th_sc_entry_add(th_sc_entry_t **root, th_key_t *key, th_any_t value); #endif diff --git a/src/separate_chaining/table.c b/src/separate_chaining/table.c index 7adc9b3..412dfeb 100644 --- a/src/separate_chaining/table.c +++ b/src/separate_chaining/table.c @@ -4,6 +4,7 @@ #include #include "table.h" +#include "../common/table.h" static bool th_sc_table_put_with_key(th_sc_table_t *table, th_key_t *key, th_any_t value); @@ -31,31 +32,16 @@ th_generic_table_t th_sc_table_create() return (th_generic_table_t) _th_sc_table_create(); } -static bool th_sc_table_increase(th_sc_table_t *table) +static bool th_sc_table_copy(th_sc_table_t *dest, th_sc_table_t *src) { - th_sc_table_t new_table; - - th_sc_table_init(&new_table); + bool success; - // New capacity - new_table.capacity = TH_SC_TABLE_NEXT_CAPACITY(table->capacity); - - // New entry array bytes size - size_t size = sizeof(th_sc_entry_t *) * new_table.capacity; - - new_table.entries = (th_sc_entry_t **) malloc(size); - if (new_table.entries == NULL) return false; - - memset(new_table.entries, 0, size); - - // Re-compute the new index - for (int i = 0; i < table->capacity; i++) { - bool success; - th_sc_entry_t *entry = table->entries[i]; + for (int i = 0; i < src->capacity; i++) { + th_sc_entry_t *entry = src->entries[i]; while (entry != NULL) { success = th_sc_table_put_with_key( - &new_table, + dest, &entry->key, entry->value ); @@ -66,7 +52,29 @@ static bool th_sc_table_increase(th_sc_table_t *table) } } - // Destroy the old table + return true; +} + + +static bool th_sc_table_increase(th_sc_table_t *table) +{ + th_sc_table_t new_table; + bool success; + + th_sc_table_init(&new_table); + + new_table.capacity = TH_TABLE_NEXT_CAPACITY(table->capacity); + + size_t size = sizeof(th_sc_entry_t *) * new_table.capacity; + new_table.entries = malloc(size); + + if (new_table.entries == NULL) return false; + + memset(new_table.entries, 0, size); + + success = th_sc_table_copy(&new_table, table); + if (success == false) return false; + th_sc_table_free((th_generic_table_t) table); *table = new_table; diff --git a/src/separate_chaining/table.h b/src/separate_chaining/table.h index 2189723..f9fcca0 100644 --- a/src/separate_chaining/table.h +++ b/src/separate_chaining/table.h @@ -1,5 +1,5 @@ -#ifndef __TINYHASH_TABLE_H__ -#define __TINYHASH_TABLE_H__ +#ifndef __TINYHASH_SC_TABLE_H__ +#define __TINYHASH_SC_TABLE_H__ #include #include @@ -14,9 +14,6 @@ typedef struct { th_sc_entry_t **entries; } th_sc_table_t; -#define TH_SC_TABLE_NEXT_CAPACITY(capacity) \ - (capacity) == 0 ? 8 : (capacity) * 2 - void th_sc_table_init(th_sc_table_t *table); th_generic_table_t th_sc_table_create(); diff --git a/src/tinyhash.c b/src/tinyhash.c index 9439f7d..83ef40f 100644 --- a/src/tinyhash.c +++ b/src/tinyhash.c @@ -1,5 +1,6 @@ #include "tinyhash.h" #include "./separate_chaining/table.h" +#include "./open_addressing/table.h" static th_funcs_t th_funcs[] = { [TH_SEPARATE_CHAINING] = { @@ -11,20 +12,20 @@ static th_funcs_t th_funcs[] = { }, [TH_OPEN_ADRESSING] = { - .create = NULL, - .get = NULL, - .put = NULL, - ._delete = NULL, - ._free = NULL, + .create = th_oa_table_create, + .get = th_oa_table_get, + .put = th_oa_table_put, + ._delete = th_oa_table_delete, + ._free = th_oa_table_free, }, }; -th_t th_create(th_kind_t kind) +th_t th_create(th_method_t method) { - th_funcs_t funcs = th_funcs[kind]; + th_funcs_t funcs = th_funcs[method]; return (th_t) { - .kind = kind, + .method = method, .funcs = funcs, .table = funcs.create(), }; diff --git a/src/tinyhash.h b/src/tinyhash.h index 1aa1919..20dbcaf 100644 --- a/src/tinyhash.h +++ b/src/tinyhash.h @@ -9,7 +9,7 @@ typedef enum { TH_SEPARATE_CHAINING, TH_OPEN_ADRESSING, -} th_kind_t; +} th_method_t; typedef th_generic_table_t (*th_create_func_t)(void); @@ -30,12 +30,12 @@ typedef struct { } th_funcs_t; typedef struct { - th_kind_t kind; + th_method_t method; th_funcs_t funcs; - void *table; + th_generic_table_t table; } th_t; -th_t th_create(th_kind_t kind); +th_t th_create(th_method_t method); th_t th_create_default(); diff --git a/tests/test_table.c b/tests/test_table.c index 93ffda2..33e3abe 100644 --- a/tests/test_table.c +++ b/tests/test_table.c @@ -4,6 +4,8 @@ #include "../src/tinyhash.h" #include "munit/munit.h" +#include "../src/open_addressing/table.h" + typedef struct { uint32_t a; uint8_t b[22]; @@ -11,13 +13,25 @@ typedef struct { #define SET_GET_ITERATIONS 256 * 256 +static th_method_t str_to_method(const char *str) +{ + switch (*str) { + case 'o': return TH_OPEN_ADRESSING; + case 's': return TH_SEPARATE_CHAINING; + } + + return TH_SEPARATE_CHAINING; +} + MunitResult test_th_put_and_get(const MunitParameter params[], void* data) { th_any_t value; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); th_put(&th, "hello", strlen("hello"), (th_any_t) 333); + value = th_get(&th, "hello", strlen("hello")); munit_assert_uint64((uint64_t) value, ==, 333); @@ -30,10 +44,12 @@ MunitResult test_th_put_and_get(const MunitParameter params[], void* data) MunitResult test_th_get_with_empty_table(const MunitParameter params[], void* data) { th_any_t value; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); value = th_get(&th, "hello", strlen("hello")); + munit_assert_null(value); th_free(&th); @@ -44,8 +60,9 @@ MunitResult test_th_put_with_full_table(const MunitParameter params[], void* dat { int *value; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); for (int i = 0; i < SET_GET_ITERATIONS; i++) { int *j = malloc(sizeof(i)); @@ -74,7 +91,6 @@ MunitResult test_th_put_with_full_table(const MunitParameter params[], void* dat th_put(&th, "hb", strlen("hb"), (th_any_t) 1); th_put(&th, "azdaz", strlen("azdaz"), (th_any_t) 10); - value = th_get(&th, "azdaz", strlen("azdaz")); munit_assert_int((uint64_t) value, ==, 10); @@ -87,8 +103,9 @@ MunitResult test_th_put_with_full_table(const MunitParameter params[], void* dat MunitResult test_th_put_overwrite(const MunitParameter params[], void* data) { th_any_t value; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); th_put(&th, "a", strlen("a"), (th_any_t) 1); th_put(&th, "b", strlen("b"), (th_any_t) 1); @@ -106,8 +123,9 @@ MunitResult test_th_put_overwrite(const MunitParameter params[], void* data) MunitResult test_th_put_collision(const MunitParameter params[], void* data) { th_any_t value; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); th_put(&th, "b", strlen("b"), (th_any_t) 123); th_put(&th, "ello", strlen("ello"), (th_any_t) 456); @@ -132,8 +150,9 @@ MunitResult test_th_put_struct_as_key(const MunitParameter params[], void* data) test_struct.b[3] = 222; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); th_put(&th, &test_struct, sizeof(TestStruct), (th_any_t) 123); @@ -152,8 +171,9 @@ MunitResult test_th_delete(const MunitParameter params[], void* data) th_any_t value; bool ok; - th_kind_t kind = (uint64_t) munit_parameters_get(params, "kind"); - th_t th = th_create(kind); + const char *method_str = munit_parameters_get(params, "method"); + th_method_t method = str_to_method(method_str); + th_t th = th_create(method); char *keys[] = { "a", @@ -180,6 +200,7 @@ MunitResult test_th_delete(const MunitParameter params[], void* data) ok = th_delete(&th, "a", strlen("a")); munit_assert_true(ok); + ok = th_delete(&th, "a", strlen("a")); munit_assert_false(ok); ok = th_delete(&th, "ello", strlen("ello")); diff --git a/tests/tests.c b/tests/tests.c index fb30be7..1b8ff41 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -2,14 +2,13 @@ #include "test_hash.h" #include "test_table.h" -#include "../src/tinyhash.h" -static char *th_kinds[] = { - (char *) TH_SEPARATE_CHAINING, NULL +static char *th_methods[] = { + (char *) "separate_chaining", (char *) "open_adressing", NULL }; static MunitParameterEnum th_params[] = { - { "kind", th_kinds }, + { "method", th_methods }, { NULL, NULL }, }; @@ -47,7 +46,7 @@ static MunitTest test_th_suite_tests[] = { NULL }, { - "/th_set_and_get", + "/th_put_and_get", test_th_put_and_get, NULL, NULL, @@ -60,10 +59,10 @@ static MunitTest test_th_suite_tests[] = { NULL, NULL, MUNIT_TEST_OPTION_NONE, - NULL + th_params }, { - "/th_set_with_full_table", + "/th_put_with_full_table", test_th_put_with_full_table, NULL, NULL, @@ -71,7 +70,7 @@ static MunitTest test_th_suite_tests[] = { th_params }, { - "/th_set_overwrite", + "/th_put_overwrite", test_th_put_overwrite, NULL, NULL, @@ -79,7 +78,7 @@ static MunitTest test_th_suite_tests[] = { th_params }, { - "/th_set_collision", + "/th_put_collision", test_th_put_collision, NULL, NULL,