Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tag to representation of str objects - Fix emission of string literals in the presence of escape sequence #13

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 56 additions & 15 deletions btl/runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,37 @@
#include <stdlib.h>
#include <string.h>

#define TAG_SIZE sizeof(char *)

struct tigerstr {
char *tag;
int64_t length;
/* This can be infinitely long when we start allocating strings. */
unsigned char chars[1];
};

struct tigerarray {
char *tag;
int64_t length;
int64_t data[1];
};

struct tigerrecord {
char *tag;
int64_t data[1];
};

/* Main function generated by compiler that will be linked with the runtime
* library. */
extern int tigermain(int);

struct tigerstr consts[256];
struct tigerstr empty = {0, ""};
/* Giving this a special tag as we do not want this to be garbage collected. */
struct tigerstr empty = {"!s", 0, ""};

int main() {
for (int i = 0; i < 256; i++) {
consts[i].tag = "!s";
consts[i].length = 1;
consts[i].chars[0] = i;
}
Expand All @@ -30,23 +46,43 @@ void print(struct tigerstr *s) {
putchar(*p);
}

int64_t *initArray(int64_t size, int64_t init) {
int i;
int64_t *a = (int64_t *)malloc(size * sizeof(int64_t));
struct tigerarray *initArray(int64_t size, int64_t init, int64_t is_ptr) {
/* First slot is the tag, followed by the size of the array and then
* the data. */
struct tigerarray *a = malloc(TAG_SIZE + (size + 1) * sizeof(int64_t));
if (a == NULL) {
fprintf(stderr, "Failed to allocate memory for `initArray`.\n");
exit(1);
}

if (is_ptr) {
a->tag = "ap";
} else {

a->tag = "an";
}

a->length = size;

for (int i = 0; i < size; i++)
a[i] = init;
a->data[i] = init;

return a;
}

int *allocRecord(int size) {
int i;
int *p, *a;
p = a = (int *)malloc(size);
struct tigerrecord *allocRecord(char *tag) {
int size = strlen(tag);
struct tigerrecord *a = malloc(sizeof(char *) + size * sizeof(int64_t));

if (a == NULL) {
fprintf(stderr, "Failed to allocate memory for `allocRecord`.\n");
exit(1);
}

for (i = 0; i < size; i += sizeof(int))
*p++ = 0;
a->tag = tag;

for (int i = 0; i < size; i++)
a->data[i] = 0;

return a;
}
Expand Down Expand Up @@ -77,24 +113,27 @@ int64_t size(const struct tigerstr *s) { return s->length; }
/* Substring of string `s`, starting with character `first`, `n` characters
* long. Characters are numbered starting at 0. */
struct tigerstr *substring(const struct tigerstr *s, int64_t first, int64_t n) {
int substr_len;
int64_t s_len = s->length;

// Ensure `first` and `n` are within bounds.
if (first < 0 || first + n > s->length) {
fprintf(stderr, "substring([%lld],%lld,%lld) out of range\n", s->length, first, n);
fprintf(stderr, "substring([%lld],%lld,%lld) out of range\n", s->length,
first, n);
exit(1);
}

if (n == 1)
return consts + s->chars[first];
else {
struct tigerstr *t = malloc(sizeof(int64_t) + n);
struct tigerstr *t = malloc(TAG_SIZE + sizeof(int64_t) + n);
if (t == NULL) {
fprintf(stderr, "Failed to allocate memory for `substring`.\n");
exit(1);
}

/* This string needs to be handled by the GC, hence we give it the
* 's' tag */
t->tag = "s";
t->length = n;

for (int i = 0; i < n; i++)
Expand All @@ -112,12 +151,14 @@ struct tigerstr *concat(struct tigerstr *a, struct tigerstr *b) {
else {
int n = a->length + b->length;

struct tigerstr *t = malloc(sizeof(int) + n);
struct tigerstr *t = malloc(TAG_SIZE + sizeof(int) + n);
if (t == NULL) {
fprintf(stderr, "Failed to allocate memory for `substring`.\n");
exit(1);
}

/* GC needs to handle this eventually, hence we give it the 's' tag */
t->tag = "s";
t->length = n;

for (int i = 0; i < a->length; i++)
Expand Down
3 changes: 2 additions & 1 deletion lib/codegen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,8 @@ module RiscVGen : CODEGEN = struct

let generateFrag = function
| Frame.PROC { body; frame } -> generateFunctionStm body frame
| Frame.STRING (lab, str) -> Frame.string lab str
| Frame.STRING (lab, str) -> Frame.string_obj lab str
| Frame.STRING_LIT (lab, str) -> Frame.string_lit lab str

let%test_unit "test_codegen_test_files" =
let test_dir = "../../../tests/codegen/" in
Expand Down
3 changes: 2 additions & 1 deletion lib/driver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ let generateFrag f =
in
match f with
| Frame.PROC { body; frame } -> generateFunctionStm body frame
| Frame.STRING (lab, str) -> Frame.string lab str
| Frame.STRING (lab, str) -> Frame.string_obj lab str
| Frame.STRING_LIT (lab, str) -> Frame.string_lit lab str

let compile_channel ?filename chan =
let absyn = Parser.parse_channel ?filename chan in
Expand Down
43 changes: 29 additions & 14 deletions lib/frame.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ module type FRAME = sig
type frag =
| PROC of { body : Tree.stm; frame : frame }
| STRING of Temp.label * string
| STRING_LIT of Temp.label * string

val new_frame : new_frame_args -> frame
val name : frame -> Temp.label
Expand Down Expand Up @@ -76,7 +77,8 @@ module type FRAME = sig
val fn_prolog_epilog_to_string :
fn_prolog_epilog -> register_map:register Temp.tbl -> string

val string : Symbol.symbol -> string -> string
val string_obj : Symbol.symbol -> string -> string
val string_lit : Symbol.symbol -> string -> string
end

(* TODO: Implement this if we really want to target the x86 architecture *)
Expand Down Expand Up @@ -192,6 +194,7 @@ module RiscVFrame : FRAME = struct
type frag =
| PROC of { body : Tree.stm; frame : frame }
| STRING of Temp.label * string
| STRING_LIT of Temp.label * string

let word_size = 8

Expand Down Expand Up @@ -445,30 +448,42 @@ module RiscVFrame : FRAME = struct
in
{ prolog; body; epilog }

(* Instruction to generate a string with a label. *)
let string label str =
(* Instruction to generate a string literal with a label. *)
let string_lit label str =
Printf.sprintf "\t.section\t.rodata\n\t.align\t3\n%s:\n\t.string\t\"%s\"\n"
(Symbol.name label) str

(* Instruction to generate a string object with a label. *)
let string_obj label str =
let label = Symbol.name label in
let strlen = String.length str in
(* We are aligning this to 8, i.e. .align 3 (power of 2)*)
let tag_label = Temp.new_label () in
let char_list = String.to_list @@ Stdlib.Scanf.unescaped str in
let tag_code = string_lit tag_label "!s" in
let strlen = List.length char_list in
(* We are aligning this to 8, i.e. .align 3 (power of 2) *)
let padding = 8 - (strlen % 8) in
(* +8 because the first 8 bytes is used for the string length *)
(* +8 because the first 8 bytes is used for the pointer to the tag *)
(* +8 because the next 8 bytes is used for the string length *)
(* `!s` is the tag that the runtime uses for strings literals, i.e.
don't need to be GC-ed. For convenience, we keep emitting the tag
literal, though this should be optimised for sure. *)
let preamble =
Printf.sprintf
" \t.globl\t%s\n\
\t.data\n\
"\t.globl\t%s\n\
%s\t.data\n\
\t.align\t3\n\
\t.type\t%s, @object\n\
\t.size\t%s, %d\n\
%s:\n\
\t.dword\t%s\n\
\t.dword\t%d\n"
label label label
(8 + strlen + padding)
label strlen
label tag_code label label
(8 + 8 + strlen + padding)
label (Symbol.name tag_label) strlen
in
let body =
String.to_list @@ Stdlib.Scanf.unescaped str
|> List.map ~f:(fun c ->
Printf.sprintf "\t.byte\t%d" @@ Stdlib.Char.code c)
List.map char_list ~f:(fun c ->
Printf.sprintf "\t.byte\t%d" @@ Stdlib.Char.code c)
|> String.concat ~sep:"\n"
in
let postamble =
Expand Down
9 changes: 7 additions & 2 deletions lib/semant.ml
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,11 @@ module Semant : SEMANT = struct
~init:[] fields
|> List.rev
in
let is_ptr_list =
List.map ~f:(fun (_, ty) -> Types.is_ptr ty) fields
in
{
exp = Translate.recordExp input_field_exps;
exp = Translate.recordExp input_field_exps is_ptr_list;
ty = record_type;
}
| _ ->
Expand Down Expand Up @@ -349,7 +352,9 @@ module Semant : SEMANT = struct
"array initial value expected to be of type %s"
(Types.to_string t) );
{
exp = Translate.arrayExp (size_expty.exp, init_expty.exp);
exp =
Translate.arrayExp
(size_expty.exp, init_expty.exp, Types.is_ptr t);
ty = ty';
}
| _ -> { exp = Translate.default_exp; ty = Types.INT })
Expand Down
Loading