From 2d9e8ef1c5613335ad35cccd3d98b20b7dadae12 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Tue, 19 Sep 2023 13:19:35 -0400 Subject: [PATCH 1/2] shrink `yp_constant_t` by 8 bytes --- include/yarp/util/yp_constant_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/yarp/util/yp_constant_pool.h b/include/yarp/util/yp_constant_pool.h index c07cd0b7d8..a43b9b4740 100644 --- a/include/yarp/util/yp_constant_pool.h +++ b/include/yarp/util/yp_constant_pool.h @@ -42,9 +42,9 @@ void yp_constant_id_list_free(yp_constant_id_list_t *list); typedef struct { unsigned int id: 31; bool owned: 1; + uint32_t hash; const uint8_t *start; size_t length; - uint32_t hash; } yp_constant_t; typedef struct { From 43a25da8a9ea99546f7c822c535a31525168afe8 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Tue, 19 Sep 2023 13:56:49 -0400 Subject: [PATCH 2/2] rearrange the constant pool so IDs can be used for indexing --- include/yarp/util/yp_constant_pool.h | 6 ++- rust/yarp/build.rs | 9 +--- src/util/yp_constant_pool.c | 73 ++++++++++++++++++---------- templates/ext/yarp/api_node.c.erb | 8 ++- templates/src/serialize.c.erb | 10 ++-- 5 files changed, 61 insertions(+), 45 deletions(-) diff --git a/include/yarp/util/yp_constant_pool.h b/include/yarp/util/yp_constant_pool.h index a43b9b4740..00ed009e48 100644 --- a/include/yarp/util/yp_constant_pool.h +++ b/include/yarp/util/yp_constant_pool.h @@ -43,18 +43,22 @@ typedef struct { unsigned int id: 31; bool owned: 1; uint32_t hash; +} yp_constant_pool_bucket_t; + +typedef struct { const uint8_t *start; size_t length; } yp_constant_t; typedef struct { + yp_constant_pool_bucket_t *buckets; yp_constant_t *constants; uint32_t size; uint32_t capacity; } yp_constant_pool_t; // Define an empty constant pool. -#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 }) +#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .buckets = NULL, .constants = NULL, .size = 0, .capacity = 0 }) // Initialize a new constant pool with a given capacity. bool yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity); diff --git a/rust/yarp/build.rs b/rust/yarp/build.rs index 2b190f1caa..613224a521 100644 --- a/rust/yarp/build.rs +++ b/rust/yarp/build.rs @@ -551,13 +551,8 @@ impl<'pr> ConstantId<'pr> {{ pub fn as_slice(&self) -> &'pr [u8] {{ unsafe {{ let pool = &(*self.parser.as_ptr()).constant_pool; - for i in 0..pool.capacity {{ - let constant = &(*pool.constants.add(i.try_into().unwrap())); - if constant.id() == self.id {{ - return std::slice::from_raw_parts(constant.start, constant.length); - }} - }} - panic!("Unable to locate constant id"); + let constant = &(*pool.constants.add((self.id - 1).try_into().unwrap())); + std::slice::from_raw_parts(constant.start, constant.length) }} }} }} diff --git a/src/util/yp_constant_pool.c b/src/util/yp_constant_pool.c index 18376dce82..3a3ea01301 100644 --- a/src/util/yp_constant_pool.c +++ b/src/util/yp_constant_pool.c @@ -93,34 +93,43 @@ yp_constant_pool_resize(yp_constant_pool_t *pool) { if (next_capacity < pool->capacity) return false; const uint32_t mask = next_capacity - 1; - yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t)); - if (next_constants == NULL) return false; - - // For each constant in the current constant pool, rehash the content, find - // the index in the next constant pool, and insert it. + const size_t element_size = sizeof(yp_constant_pool_bucket_t) + sizeof(yp_constant_t); + void *next = calloc(next_capacity, element_size); + if (next == NULL) return false; + yp_constant_pool_bucket_t *next_buckets = next; + yp_constant_t *next_constants = (void *)(((char *)next) + next_capacity * sizeof(yp_constant_pool_bucket_t)); + + // For each bucket in the current constant pool, find the index in the + // next constant pool, and insert it. for (uint32_t index = 0; index < pool->capacity; index++) { - yp_constant_t *constant = &pool->constants[index]; + yp_constant_pool_bucket_t *bucket = &pool->buckets[index]; // If an id is set on this constant, then we know we have content here. // In this case we need to insert it into the next constant pool. - if (constant->id != 0) { - uint32_t next_index = constant->hash & mask; + if (bucket->id != 0) { + uint32_t next_index = bucket->hash & mask; // This implements linear scanning to find the next available slot // in case this index is already taken. We don't need to bother // comparing the values since we know that the hash is unique. - while (next_constants[next_index].id != 0) { + while (next_buckets[next_index].id != 0) { next_index = (next_index + 1) & mask; } - // Here we copy over the entire constant, which includes the id so + // Here we copy over the entire bucket, which includes the id so // that they are consistent between resizes. - next_constants[next_index] = *constant; + next_buckets[next_index] = *bucket; } } - free(pool->constants); + // The constants are stable with respect to hash table resizes. + memcpy(next_constants, pool->constants, pool->size * sizeof(yp_constant_t)); + + // pool->constants and pool->buckets are allocated out of the same chunk + // of memory, with the buckets coming first. + free(pool->buckets); pool->constants = next_constants; + pool->buckets = next_buckets; pool->capacity = next_capacity; return true; } @@ -132,9 +141,12 @@ yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity) { if (capacity >= ((maximum / 2) + 1)) return false; capacity = next_power_of_two(capacity); - pool->constants = calloc(capacity, sizeof(yp_constant_t)); - if (pool->constants == NULL) return false; + const size_t element_size = sizeof(yp_constant_pool_bucket_t) + sizeof(yp_constant_t); + void *memory = calloc(capacity, element_size); + if (memory == NULL) return false; + pool->buckets = memory; + pool->constants = (void *)(((char *)memory) + capacity * sizeof(yp_constant_pool_bucket_t)); pool->size = 0; pool->capacity = capacity; return true; @@ -152,12 +164,14 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l uint32_t hash = yp_constant_pool_hash(start, length); uint32_t index = hash & mask; - yp_constant_t *constant; + yp_constant_pool_bucket_t *bucket; - while (constant = &pool->constants[index], constant->id != 0) { + while (bucket = &pool->buckets[index], bucket->id != 0) { // If there is a collision, then we need to check if the content is the // same as the content we are trying to insert. If it is, then we can // return the id of the existing constant. + yp_constant_t *constant = &pool->constants[bucket->id - 1]; + if ((constant->length == length) && memcmp(constant->start, start, length) == 0) { // Since we have found a match, we need to check if this is // attempting to insert a shared or an owned constant. We want to @@ -168,33 +182,37 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l // memory. Either it's duplicated with the existing constant or // it's not necessary because we have a shared version. free((void *) start); - } else if (constant->owned) { + } else if (bucket->owned) { // If we're attempting to insert a shared constant and the // existing constant is owned, then we can free the owned // constant and replace it with the shared constant. free((void *) constant->start); constant->start = start; - constant->owned = false; + bucket->owned = false; } - return constant->id; + return bucket->id; } index = (index + 1) & mask; } - pool->size++; + // IDs are allocated starting at 1, since the value 0 denotes a non-existant + // constant. + uint32_t id = ++pool->size; assert(pool->size < ((uint32_t) (1 << 31))); - *constant = (yp_constant_t) { - .id = (unsigned int) (pool->size & 0x7FFFFFFF), + *bucket = (yp_constant_pool_bucket_t) { + .id = (unsigned int) (id & 0x7FFFFFFF), .owned = owned, + .hash = hash + }; + pool->constants[id - 1] = (yp_constant_t) { .start = start, .length = length, - .hash = hash }; - return constant->id; + return id; } // Insert a constant into a constant pool. Returns the id of the constant, or 0 @@ -218,13 +236,14 @@ yp_constant_pool_free(yp_constant_pool_t *pool) { // For each constant in the current constant pool, free the contents if the // contents are owned. for (uint32_t index = 0; index < pool->capacity; index++) { - yp_constant_t *constant = &pool->constants[index]; + yp_constant_pool_bucket_t *bucket = &pool->buckets[index]; // If an id is set on this constant, then we know we have content here. - if (constant->id != 0 && constant->owned) { + if (bucket->id != 0 && bucket->owned) { + yp_constant_t *constant = &pool->constants[bucket->id - 1]; free((void *) constant->start); } } - free(pool->constants); + free(pool->buckets); } diff --git a/templates/ext/yarp/api_node.c.erb b/templates/ext/yarp/api_node.c.erb index d05e0f9b83..27baa8fa89 100644 --- a/templates/ext/yarp/api_node.c.erb +++ b/templates/ext/yarp/api_node.c.erb @@ -81,12 +81,10 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) { VALUE source = yp_source_new(parser, encoding); ID *constants = calloc(parser->constant_pool.size, sizeof(ID)); - for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) { - yp_constant_t constant = parser->constant_pool.constants[index]; + for (uint32_t index = 0; index < parser->constant_pool.size; index++) { + yp_constant_t *constant = &parser->constant_pool.constants[index]; - if (constant.id != 0) { - constants[constant.id - 1] = rb_intern3((const char *) constant.start, constant.length, encoding); - } + constants[index] = rb_intern3((const char *) constant->start, constant->length, encoding); } yp_node_stack_node_t *node_stack = NULL; diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 13efd8936f..70d3e87d04 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -199,16 +199,16 @@ yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) offset = buffer->length; yp_buffer_append_zeroes(buffer, parser->constant_pool.size * 8); - yp_constant_t *constant; for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) { - constant = &parser->constant_pool.constants[index]; + yp_constant_pool_bucket_t *bucket = &parser->constant_pool.buckets[index]; // If we find a constant at this index, serialize it at the correct // index in the buffer. - if (constant->id != 0) { - size_t buffer_offset = offset + ((((size_t) constant->id) - 1) * 8); + if (bucket->id != 0) { + yp_constant_t *constant = &parser->constant_pool.constants[bucket->id - 1]; + size_t buffer_offset = offset + ((((size_t)bucket->id) - 1) * 8); - if (constant->owned) { + if (bucket->owned) { // Since this is an owned constant, we are going to write its // contents into the buffer after the constant pool. So // effectively in place of the source offset, we have a buffer