-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
src: add data structures and algorithms directory
- Loading branch information
Showing
5 changed files
with
228 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
bin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
CC = gcc | ||
SRCDIR = src | ||
BINDIR = bin | ||
|
||
SOURCES := $(wildcard $(SRCDIR)/*.c) | ||
TARGETS := $(patsubst $(SRCDIR)/%.c, %, $(SOURCES)) | ||
|
||
.PHONY: all clean | ||
|
||
all: $(TARGETS) | ||
|
||
$(TARGETS): % : $(SRCDIR)/%.c | bindir | ||
$(CC) -o ${BINDIR}/$@ $< | ||
|
||
bindir: bin | ||
|
||
bin: | ||
@mkdir -p $(BINDIR) | ||
|
||
clean: | ||
${RM} -rf $(BINDIR) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
## Datastructures and Algorithms | ||
This directory contains standalone examples of data structures and algorithms | ||
used in AI/ML which I felt were interesting and wanted to explore further to | ||
understand ML inference engines and frameworks better. | ||
|
||
### Data Structures | ||
* [Trie](src/trie.c) (Prefix Tree/Digital Tree) | ||
* [Double Array Trie](src/dat.c) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <stdbool.h> | ||
#include <string.h> | ||
|
||
#define NUM_CHARS 26 // Number of lowercase letters | ||
#define INITIAL_SIZE 100 // Initial size of the arrays | ||
#define ROOT_NODE 1 // Start from index 1 for root node | ||
|
||
typedef struct { | ||
int* base; | ||
int* check; | ||
bool* terminal; | ||
int size; | ||
int capacity; | ||
} trie_dat; | ||
|
||
// Function to create and initialize the trie | ||
trie_dat* create_trie() { | ||
trie_dat* trie = malloc(sizeof(trie_dat)); | ||
trie->size = ROOT_NODE; | ||
trie->capacity = INITIAL_SIZE; | ||
trie->base = calloc(trie->capacity, sizeof(int)); | ||
trie->check = calloc(trie->capacity, sizeof(int)); | ||
trie->terminal = calloc(trie->capacity, sizeof(bool)); | ||
return trie; | ||
} | ||
|
||
// Function to ensure the trie has enough capacity | ||
void ensure_capacity(trie_dat* trie, int index) { | ||
while (index >= trie->capacity) { | ||
trie->capacity *= 2; | ||
trie->base = realloc(trie->base, trie->capacity * sizeof(int)); | ||
trie->check = realloc(trie->check, trie->capacity * sizeof(int)); | ||
trie->terminal = realloc(trie->terminal, trie->capacity * sizeof(bool)); | ||
} | ||
} | ||
|
||
// Function to insert a word into the trie | ||
void insert(trie_dat* trie, const char* word) { | ||
int cur_node = ROOT_NODE; // this is the current node, and we start at the ROOT node. | ||
|
||
for (int i = 0; word[i] != '\0'; i++) { | ||
// This is bascially mapping 'a' to 0, 'b' to 1, 'c' to 2, etc. | ||
int char_offset = word[i] - 'a'; | ||
|
||
ensure_capacity(trie, cur_node); | ||
if (trie->base[cur_node] == 0) { | ||
// base[cur_node] is the offset for the current node in the | ||
// array. This is setting it to the current node in the trie which | ||
// is trie->size. We can think of this as setting this index/offset | ||
// to the current node int tree. I thought this was strange that we | ||
// use the size but perhaps we can think of it as the depth of the | ||
// tree where we currently are. Like get go from | ||
// ROOT -> 'c' -> 'o" -> 'w' | ||
// 0 -> 1 -> 2 -> 3 (size) | ||
trie->base[cur_node] = trie->size; | ||
} | ||
|
||
// Calculate the transition index which uses base[s] + c. | ||
int t = trie->base[cur_node] + char_offset; | ||
|
||
ensure_capacity(trie, t); | ||
if (trie->check[t] == 0) { | ||
trie->check[t] = cur_node; | ||
trie->size++; | ||
} else if (trie->check[t] != cur_node) { | ||
// Handle conflicts in base/check | ||
fprintf(stderr, "Error: Conflict detected while inserting '%s'.\n", word); | ||
return; | ||
} | ||
|
||
cur_node = t; | ||
} | ||
|
||
trie->terminal[cur_node] = true; | ||
} | ||
|
||
// Recursive function to print the words in the trie | ||
void print_trie(const trie_dat* trie, int s, char* prefix, int depth) { | ||
if (trie->terminal[s]) { | ||
prefix[depth] = '\0'; | ||
printf("%s\n", prefix); | ||
} | ||
|
||
for (int c = 0; c < NUM_CHARS; c++) { | ||
int t = trie->base[s] + c; | ||
if (t < trie->capacity && trie->check[t] == s) { | ||
prefix[depth] = 'a' + c; | ||
print_trie(trie, t, prefix, depth + 1); | ||
} | ||
} | ||
} | ||
|
||
int main() { | ||
trie_dat* trie = create_trie(); | ||
|
||
insert(trie, "cow"); | ||
//insert(trie, "dog"); | ||
//insert(trie, "dad"); | ||
//insert(trie, "cat"); | ||
|
||
char prefix[100]; | ||
printf("Contents of Trie:\n"); | ||
print_trie(trie, ROOT_NODE, prefix, 0); | ||
|
||
free(trie->base); | ||
free(trie->check); | ||
free(trie->terminal); | ||
free(trie); | ||
|
||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <stdbool.h> | ||
|
||
#define NUM_CHARS 256 // (size of a byte) | ||
|
||
typedef struct trie_node { | ||
struct trie_node* children[NUM_CHARS]; | ||
bool terminal; | ||
} trie_node; | ||
|
||
trie_node* create_node() { | ||
trie_node* node = (trie_node*)malloc(sizeof(trie_node)); | ||
if (node == NULL) { | ||
fprintf(stderr, "Error: Unable to allocate memory for trie node\n"); | ||
exit(1); | ||
} | ||
node->terminal = false; | ||
for (int i = 0; i < NUM_CHARS; i++) { | ||
node->children[i] = NULL; | ||
} | ||
return node; | ||
} | ||
|
||
bool trie_insert(trie_node** root, const char* value) { | ||
if (*root == NULL) { | ||
*root = create_node(); | ||
} | ||
// char is signed so can be negative but we don't want negative index values. | ||
unsigned char* u_value = (unsigned char*) value; | ||
|
||
trie_node* current = *root; | ||
int len = strlen(value); | ||
for (int i = 0; i < len; i++) { | ||
// Create a node for the character if it doesn't exist | ||
if (current->children[u_value[i]] == NULL) { | ||
current->children[u_value[i]] = create_node(); | ||
} | ||
// Set the current node to the child node that was created or already existed | ||
current = current->children[u_value[i]]; | ||
|
||
} | ||
|
||
if (current->terminal) { | ||
return false; | ||
} else { | ||
current->terminal = true; | ||
return true; | ||
} | ||
} | ||
|
||
void print_trie(trie_node* root, char* prefix, int depth) { | ||
if (root == NULL) { | ||
printf("Trie is empty\n"); | ||
return; | ||
} | ||
|
||
if (root->terminal) { | ||
prefix[depth] = '\0'; | ||
printf("%s\n", prefix); | ||
} | ||
|
||
for (int i = 0; i < NUM_CHARS; i++) { | ||
if (root->children[i] != NULL) { | ||
prefix[depth] = i; | ||
print_trie(root->children[i], prefix, depth + 1); | ||
} | ||
} | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
printf("Trie data structure example\n"); | ||
trie_node* root = NULL; | ||
trie_insert(&root, "cow"); | ||
trie_insert(&root, "dog"); | ||
trie_insert(&root, "dad"); | ||
trie_insert(&root, "cat"); | ||
char prefix[100]; | ||
print_trie(root, prefix, 0); | ||
|
||
return 0; | ||
} |