Skip to content

Commit

Permalink
src: add data structures and algorithms directory
Browse files Browse the repository at this point in the history
  • Loading branch information
danbev committed Aug 24, 2024
1 parent ea8ff2e commit eaa5582
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 0 deletions.
1 change: 1 addition & 0 deletions fundamentals/datastructures/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bin
22 changes: 22 additions & 0 deletions fundamentals/datastructures/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
CC = gcc
SRCDIR = src
BINDIR = bin

SOURCES := $(wildcard $(SRCDIR)/*.c)
TARGETS := $(patsubst $(SRCDIR)/%.c, %, $(SOURCES))

.PHONY: all clean

all: $(TARGETS)

$(TARGETS): % : $(SRCDIR)/%.c | bindir
$(CC) -o ${BINDIR}/$@ $<

bindir: bin

bin:
@mkdir -p $(BINDIR)

clean:
${RM} -rf $(BINDIR)

8 changes: 8 additions & 0 deletions fundamentals/datastructures/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Datastructures and Algorithms
This directory contains standalone examples of data structures and algorithms
used in AI/ML which I felt were interesting and wanted to explore further to
understand ML inference engines and frameworks better.

### Data Structures
* [Trie](src/trie.c) (Prefix Tree/Digital Tree)
* [Double Array Trie](src/dat.c)
114 changes: 114 additions & 0 deletions fundamentals/datastructures/src/dat.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

#define NUM_CHARS 26 // Number of lowercase letters
#define INITIAL_SIZE 100 // Initial size of the arrays
#define ROOT_NODE 1 // Start from index 1 for root node

typedef struct {
int* base;
int* check;
bool* terminal;
int size;
int capacity;
} trie_dat;

// Function to create and initialize the trie
trie_dat* create_trie() {
trie_dat* trie = malloc(sizeof(trie_dat));
trie->size = ROOT_NODE;
trie->capacity = INITIAL_SIZE;
trie->base = calloc(trie->capacity, sizeof(int));
trie->check = calloc(trie->capacity, sizeof(int));
trie->terminal = calloc(trie->capacity, sizeof(bool));
return trie;
}

// Function to ensure the trie has enough capacity
void ensure_capacity(trie_dat* trie, int index) {
while (index >= trie->capacity) {
trie->capacity *= 2;
trie->base = realloc(trie->base, trie->capacity * sizeof(int));
trie->check = realloc(trie->check, trie->capacity * sizeof(int));
trie->terminal = realloc(trie->terminal, trie->capacity * sizeof(bool));
}
}

// Function to insert a word into the trie
void insert(trie_dat* trie, const char* word) {
int cur_node = ROOT_NODE; // this is the current node, and we start at the ROOT node.

for (int i = 0; word[i] != '\0'; i++) {
// This is bascially mapping 'a' to 0, 'b' to 1, 'c' to 2, etc.
int char_offset = word[i] - 'a';

ensure_capacity(trie, cur_node);
if (trie->base[cur_node] == 0) {
// base[cur_node] is the offset for the current node in the
// array. This is setting it to the current node in the trie which
// is trie->size. We can think of this as setting this index/offset
// to the current node int tree. I thought this was strange that we
// use the size but perhaps we can think of it as the depth of the
// tree where we currently are. Like get go from
// ROOT -> 'c' -> 'o" -> 'w'
// 0 -> 1 -> 2 -> 3 (size)
trie->base[cur_node] = trie->size;
}

// Calculate the transition index which uses base[s] + c.
int t = trie->base[cur_node] + char_offset;

ensure_capacity(trie, t);
if (trie->check[t] == 0) {
trie->check[t] = cur_node;
trie->size++;
} else if (trie->check[t] != cur_node) {
// Handle conflicts in base/check
fprintf(stderr, "Error: Conflict detected while inserting '%s'.\n", word);
return;
}

cur_node = t;
}

trie->terminal[cur_node] = true;
}

// Recursive function to print the words in the trie
void print_trie(const trie_dat* trie, int s, char* prefix, int depth) {
if (trie->terminal[s]) {
prefix[depth] = '\0';
printf("%s\n", prefix);
}

for (int c = 0; c < NUM_CHARS; c++) {
int t = trie->base[s] + c;
if (t < trie->capacity && trie->check[t] == s) {
prefix[depth] = 'a' + c;
print_trie(trie, t, prefix, depth + 1);
}
}
}

int main() {
trie_dat* trie = create_trie();

insert(trie, "cow");
//insert(trie, "dog");
//insert(trie, "dad");
//insert(trie, "cat");

char prefix[100];
printf("Contents of Trie:\n");
print_trie(trie, ROOT_NODE, prefix, 0);

free(trie->base);
free(trie->check);
free(trie->terminal);
free(trie);

return 0;
}

83 changes: 83 additions & 0 deletions fundamentals/datastructures/src/trie.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#define NUM_CHARS 256 // (size of a byte)

typedef struct trie_node {
struct trie_node* children[NUM_CHARS];
bool terminal;
} trie_node;

trie_node* create_node() {
trie_node* node = (trie_node*)malloc(sizeof(trie_node));
if (node == NULL) {
fprintf(stderr, "Error: Unable to allocate memory for trie node\n");
exit(1);
}
node->terminal = false;
for (int i = 0; i < NUM_CHARS; i++) {
node->children[i] = NULL;
}
return node;
}

bool trie_insert(trie_node** root, const char* value) {
if (*root == NULL) {
*root = create_node();
}
// char is signed so can be negative but we don't want negative index values.
unsigned char* u_value = (unsigned char*) value;

trie_node* current = *root;
int len = strlen(value);
for (int i = 0; i < len; i++) {
// Create a node for the character if it doesn't exist
if (current->children[u_value[i]] == NULL) {
current->children[u_value[i]] = create_node();
}
// Set the current node to the child node that was created or already existed
current = current->children[u_value[i]];

}

if (current->terminal) {
return false;
} else {
current->terminal = true;
return true;
}
}

void print_trie(trie_node* root, char* prefix, int depth) {
if (root == NULL) {
printf("Trie is empty\n");
return;
}

if (root->terminal) {
prefix[depth] = '\0';
printf("%s\n", prefix);
}

for (int i = 0; i < NUM_CHARS; i++) {
if (root->children[i] != NULL) {
prefix[depth] = i;
print_trie(root->children[i], prefix, depth + 1);
}
}
}

int main(int argc, char** argv) {
printf("Trie data structure example\n");
trie_node* root = NULL;
trie_insert(&root, "cow");
trie_insert(&root, "dog");
trie_insert(&root, "dad");
trie_insert(&root, "cat");
char prefix[100];
print_trie(root, prefix, 0);

return 0;
}

0 comments on commit eaa5582

Please sign in to comment.