Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions include/prism/util/pm_constant_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,23 @@ void pm_constant_id_list_free(pm_constant_id_list_t *list);
typedef struct {
unsigned int id: 31;
bool owned: 1;
uint32_t hash;
} pm_constant_pool_bucket_t;

typedef struct {
const uint8_t *start;
size_t length;
uint32_t hash;
} pm_constant_t;

typedef struct {
pm_constant_pool_bucket_t *buckets;
pm_constant_t *constants;
uint32_t size;
uint32_t capacity;
} pm_constant_pool_t;

// Define an empty constant pool.
#define PM_CONSTANT_POOL_EMPTY ((pm_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
#define PM_CONSTANT_POOL_EMPTY ((pm_constant_pool_t) { .buckets = NULL, .constants = NULL, .size = 0, .capacity = 0 })

// Initialize a new constant pool with a given capacity.
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
Expand Down
9 changes: 2 additions & 7 deletions rust/prism/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,13 +551,8 @@ impl<'pr> ConstantId<'pr> {{
pub fn as_slice(&self) -> &'pr [u8] {{
unsafe {{
let pool = &(*self.parser.as_ptr()).constant_pool;
for i in 0..pool.capacity {{
let constant = &(*pool.constants.add(i.try_into().unwrap()));
if constant.id() == self.id {{
return std::slice::from_raw_parts(constant.start, constant.length);
}}
}}
panic!("Unable to locate constant id");
let constant = &(*pool.constants.add((self.id - 1).try_into().unwrap()));
std::slice::from_raw_parts(constant.start, constant.length)
}}
}}
}}
Expand Down
74 changes: 48 additions & 26 deletions src/util/pm_constant_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,34 +93,45 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
if (next_capacity < pool->capacity) return false;

const uint32_t mask = next_capacity - 1;
pm_constant_t *next_constants = calloc(next_capacity, sizeof(pm_constant_t));
if (next_constants == NULL) return false;
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);

// For each constant in the current constant pool, rehash the content, find
// the index in the next constant pool, and insert it.
void *next = calloc(next_capacity, element_size);
if (next == NULL) return false;

pm_constant_pool_bucket_t *next_buckets = next;
pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));

// For each bucket in the current constant pool, find the index in the
// next constant pool, and insert it.
for (uint32_t index = 0; index < pool->capacity; index++) {
pm_constant_t *constant = &pool->constants[index];
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];

// If an id is set on this constant, then we know we have content here.
// In this case we need to insert it into the next constant pool.
if (constant->id != 0) {
uint32_t next_index = constant->hash & mask;
if (bucket->id != 0) {
uint32_t next_index = bucket->hash & mask;

// This implements linear scanning to find the next available slot
// in case this index is already taken. We don't need to bother
// comparing the values since we know that the hash is unique.
while (next_constants[next_index].id != 0) {
while (next_buckets[next_index].id != 0) {
next_index = (next_index + 1) & mask;
}

// Here we copy over the entire constant, which includes the id so
// Here we copy over the entire bucket, which includes the id so
// that they are consistent between resizes.
next_constants[next_index] = *constant;
next_buckets[next_index] = *bucket;
}
}

free(pool->constants);
// The constants are stable with respect to hash table resizes.
memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));

// pool->constants and pool->buckets are allocated out of the same chunk
// of memory, with the buckets coming first.
free(pool->buckets);
pool->constants = next_constants;
pool->buckets = next_buckets;
pool->capacity = next_capacity;
return true;
}
Expand All @@ -132,9 +143,12 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
if (capacity >= ((maximum / 2) + 1)) return false;

capacity = next_power_of_two(capacity);
pool->constants = calloc(capacity, sizeof(pm_constant_t));
if (pool->constants == NULL) return false;
const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
void *memory = calloc(capacity, element_size);
if (memory == NULL) return false;

pool->buckets = memory;
pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
pool->size = 0;
pool->capacity = capacity;
return true;
Expand All @@ -152,12 +166,14 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l

uint32_t hash = pm_constant_pool_hash(start, length);
uint32_t index = hash & mask;
pm_constant_t *constant;
pm_constant_pool_bucket_t *bucket;

while (constant = &pool->constants[index], constant->id != 0) {
while (bucket = &pool->buckets[index], bucket->id != 0) {
// If there is a collision, then we need to check if the content is the
// same as the content we are trying to insert. If it is, then we can
// return the id of the existing constant.
pm_constant_t *constant = &pool->constants[bucket->id - 1];

if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
// Since we have found a match, we need to check if this is
// attempting to insert a shared or an owned constant. We want to
Expand All @@ -168,33 +184,38 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
// memory. Either it's duplicated with the existing constant or
// it's not necessary because we have a shared version.
free((void *) start);
} else if (constant->owned) {
} else if (bucket->owned) {
// If we're attempting to insert a shared constant and the
// existing constant is owned, then we can free the owned
// constant and replace it with the shared constant.
free((void *) constant->start);
constant->start = start;
constant->owned = false;
bucket->owned = false;
}

return constant->id;
return bucket->id;
}

index = (index + 1) & mask;
}

pool->size++;
// IDs are allocated starting at 1, since the value 0 denotes a non-existant
// constant.
uint32_t id = ++pool->size;
assert(pool->size < ((uint32_t) (1 << 31)));

*constant = (pm_constant_t) {
.id = (unsigned int) (pool->size & 0x7FFFFFFF),
*bucket = (pm_constant_pool_bucket_t) {
.id = (unsigned int) (id & 0x7FFFFFFF),
.owned = owned,
.hash = hash
};

pool->constants[id - 1] = (pm_constant_t) {
.start = start,
.length = length,
.hash = hash
};

return constant->id;
return id;
}

// Insert a constant into a constant pool. Returns the id of the constant, or 0
Expand All @@ -218,13 +239,14 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
// For each constant in the current constant pool, free the contents if the
// contents are owned.
for (uint32_t index = 0; index < pool->capacity; index++) {
pm_constant_t *constant = &pool->constants[index];
pm_constant_pool_bucket_t *bucket = &pool->buckets[index];

// If an id is set on this constant, then we know we have content here.
if (constant->id != 0 && constant->owned) {
if (bucket->id != 0 && bucket->owned) {
pm_constant_t *constant = &pool->constants[bucket->id - 1];
free((void *) constant->start);
}
}

free(pool->constants);
free(pool->buckets);
}
9 changes: 3 additions & 6 deletions templates/ext/prism/api_node.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,9 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
VALUE source = pm_source_new(parser, encoding);
ID *constants = calloc(parser->constant_pool.size, sizeof(ID));

for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) {
pm_constant_t constant = parser->constant_pool.constants[index];

if (constant.id != 0) {
constants[constant.id - 1] = rb_intern3((const char *) constant.start, constant.length, encoding);
}
for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
pm_constant_t *constant = &parser->constant_pool.constants[index];
constants[index] = rb_intern3((const char *) constant->start, constant->length, encoding);
}

pm_node_stack_node_t *node_stack = NULL;
Expand Down
10 changes: 5 additions & 5 deletions templates/src/serialize.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -203,16 +203,16 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
offset = buffer->length;
pm_buffer_append_zeroes(buffer, parser->constant_pool.size * 8);

pm_constant_t *constant;
for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) {
constant = &parser->constant_pool.constants[index];
pm_constant_pool_bucket_t *bucket = &parser->constant_pool.buckets[index];

// If we find a constant at this index, serialize it at the correct
// index in the buffer.
if (constant->id != 0) {
size_t buffer_offset = offset + ((((size_t) constant->id) - 1) * 8);
if (bucket->id != 0) {
pm_constant_t *constant = &parser->constant_pool.constants[bucket->id - 1];
size_t buffer_offset = offset + ((((size_t)bucket->id) - 1) * 8);

if (constant->owned) {
if (bucket->owned) {
// Since this is an owned constant, we are going to write its
// contents into the buffer after the constant pool. So
// effectively in place of the source offset, we have a buffer
Expand Down