diff --git a/btl/runtime.c b/btl/runtime.c index c314a97..7732ddd 100644 --- a/btl/runtime.c +++ b/btl/runtime.c @@ -2,21 +2,37 @@ #include #include +#define TAG_SIZE sizeof(char *) + struct tigerstr { + char *tag; int64_t length; /* This can be infinitely long when we start allocating strings. */ unsigned char chars[1]; }; +struct tigerarray { + char *tag; + int64_t length; + int64_t data[1]; +}; + +struct tigerrecord { + char *tag; + int64_t data[1]; +}; + /* Main function generated by compiler that will be linked with the runtime * library. */ extern int tigermain(int); struct tigerstr consts[256]; -struct tigerstr empty = {0, ""}; +/* Giving this a special tag as we do not want this to be garbage collected. */ +struct tigerstr empty = {"!s", 0, ""}; int main() { for (int i = 0; i < 256; i++) { + consts[i].tag = "!s"; consts[i].length = 1; consts[i].chars[0] = i; } @@ -30,23 +46,43 @@ void print(struct tigerstr *s) { putchar(*p); } -int64_t *initArray(int64_t size, int64_t init) { - int i; - int64_t *a = (int64_t *)malloc(size * sizeof(int64_t)); +struct tigerarray *initArray(int64_t size, int64_t init, int64_t is_ptr) { + /* First slot is the tag, followed by the size of the array and then + * the data. */ + struct tigerarray *a = malloc(TAG_SIZE + (size + 1) * sizeof(int64_t)); + if (a == NULL) { + fprintf(stderr, "Failed to allocate memory for `initArray`.\n"); + exit(1); + } + + if (is_ptr) { + a->tag = "ap"; + } else { + + a->tag = "an"; + } + + a->length = size; for (int i = 0; i < size; i++) - a[i] = init; + a->data[i] = init; return a; } -int *allocRecord(int size) { - int i; - int *p, *a; - p = a = (int *)malloc(size); +struct tigerrecord *allocRecord(char *tag) { + int size = strlen(tag); + struct tigerrecord *a = malloc(sizeof(char *) + size * sizeof(int64_t)); + + if (a == NULL) { + fprintf(stderr, "Failed to allocate memory for `allocRecord`.\n"); + exit(1); + } - for (i = 0; i < size; i += sizeof(int)) - *p++ = 0; + a->tag = tag; + + for (int i = 0; i < size; i++) + a->data[i] = 0; return a; } @@ -77,24 +113,27 @@ int64_t size(const struct tigerstr *s) { return s->length; } /* Substring of string `s`, starting with character `first`, `n` characters * long. Characters are numbered starting at 0. */ struct tigerstr *substring(const struct tigerstr *s, int64_t first, int64_t n) { - int substr_len; int64_t s_len = s->length; // Ensure `first` and `n` are within bounds. if (first < 0 || first + n > s->length) { - fprintf(stderr, "substring([%lld],%lld,%lld) out of range\n", s->length, first, n); + fprintf(stderr, "substring([%lld],%lld,%lld) out of range\n", s->length, + first, n); exit(1); } if (n == 1) return consts + s->chars[first]; else { - struct tigerstr *t = malloc(sizeof(int64_t) + n); + struct tigerstr *t = malloc(TAG_SIZE + sizeof(int64_t) + n); if (t == NULL) { fprintf(stderr, "Failed to allocate memory for `substring`.\n"); exit(1); } + /* This string needs to be handled by the GC, hence we give it the + * 's' tag */ + t->tag = "s"; t->length = n; for (int i = 0; i < n; i++) @@ -112,12 +151,14 @@ struct tigerstr *concat(struct tigerstr *a, struct tigerstr *b) { else { int n = a->length + b->length; - struct tigerstr *t = malloc(sizeof(int) + n); + struct tigerstr *t = malloc(TAG_SIZE + sizeof(int) + n); if (t == NULL) { fprintf(stderr, "Failed to allocate memory for `substring`.\n"); exit(1); } + /* GC needs to handle this eventually, hence we give it the 's' tag */ + t->tag = "s"; t->length = n; for (int i = 0; i < a->length; i++) diff --git a/lib/codegen.ml b/lib/codegen.ml index 770c60e..42cfe4c 100644 --- a/lib/codegen.ml +++ b/lib/codegen.ml @@ -372,7 +372,8 @@ module RiscVGen : CODEGEN = struct let generateFrag = function | Frame.PROC { body; frame } -> generateFunctionStm body frame - | Frame.STRING (lab, str) -> Frame.string lab str + | Frame.STRING (lab, str) -> Frame.string_obj lab str + | Frame.STRING_LIT (lab, str) -> Frame.string_lit lab str let%test_unit "test_codegen_test_files" = let test_dir = "../../../tests/codegen/" in diff --git a/lib/driver.ml b/lib/driver.ml index ec0ceb3..74d725f 100644 --- a/lib/driver.ml +++ b/lib/driver.ml @@ -23,7 +23,8 @@ let generateFrag f = in match f with | Frame.PROC { body; frame } -> generateFunctionStm body frame - | Frame.STRING (lab, str) -> Frame.string lab str + | Frame.STRING (lab, str) -> Frame.string_obj lab str + | Frame.STRING_LIT (lab, str) -> Frame.string_lit lab str let compile_channel ?filename chan = let absyn = Parser.parse_channel ?filename chan in diff --git a/lib/frame.ml b/lib/frame.ml index d261b3a..bf0254b 100644 --- a/lib/frame.ml +++ b/lib/frame.ml @@ -17,6 +17,7 @@ module type FRAME = sig type frag = | PROC of { body : Tree.stm; frame : frame } | STRING of Temp.label * string + | STRING_LIT of Temp.label * string val new_frame : new_frame_args -> frame val name : frame -> Temp.label @@ -76,7 +77,8 @@ module type FRAME = sig val fn_prolog_epilog_to_string : fn_prolog_epilog -> register_map:register Temp.tbl -> string - val string : Symbol.symbol -> string -> string + val string_obj : Symbol.symbol -> string -> string + val string_lit : Symbol.symbol -> string -> string end (* TODO: Implement this if we really want to target the x86 architecture *) @@ -192,6 +194,7 @@ module RiscVFrame : FRAME = struct type frag = | PROC of { body : Tree.stm; frame : frame } | STRING of Temp.label * string + | STRING_LIT of Temp.label * string let word_size = 8 @@ -445,30 +448,42 @@ module RiscVFrame : FRAME = struct in { prolog; body; epilog } - (* Instruction to generate a string with a label. *) - let string label str = + (* Instruction to generate a string literal with a label. *) + let string_lit label str = + Printf.sprintf "\t.section\t.rodata\n\t.align\t3\n%s:\n\t.string\t\"%s\"\n" + (Symbol.name label) str + + (* Instruction to generate a string object with a label. *) + let string_obj label str = let label = Symbol.name label in - let strlen = String.length str in - (* We are aligning this to 8, i.e. .align 3 (power of 2)*) + let tag_label = Temp.new_label () in + let char_list = String.to_list @@ Stdlib.Scanf.unescaped str in + let tag_code = string_lit tag_label "!s" in + let strlen = List.length char_list in + (* We are aligning this to 8, i.e. .align 3 (power of 2) *) let padding = 8 - (strlen % 8) in - (* +8 because the first 8 bytes is used for the string length *) + (* +8 because the first 8 bytes is used for the pointer to the tag *) + (* +8 because the next 8 bytes is used for the string length *) + (* `!s` is the tag that the runtime uses for strings literals, i.e. + don't need to be GC-ed. For convenience, we keep emitting the tag + literal, though this should be optimised for sure. *) let preamble = Printf.sprintf - " \t.globl\t%s\n\ - \t.data\n\ + "\t.globl\t%s\n\ + %s\t.data\n\ \t.align\t3\n\ \t.type\t%s, @object\n\ \t.size\t%s, %d\n\ %s:\n\ + \t.dword\t%s\n\ \t.dword\t%d\n" - label label label - (8 + strlen + padding) - label strlen + label tag_code label label + (8 + 8 + strlen + padding) + label (Symbol.name tag_label) strlen in let body = - String.to_list @@ Stdlib.Scanf.unescaped str - |> List.map ~f:(fun c -> - Printf.sprintf "\t.byte\t%d" @@ Stdlib.Char.code c) + List.map char_list ~f:(fun c -> + Printf.sprintf "\t.byte\t%d" @@ Stdlib.Char.code c) |> String.concat ~sep:"\n" in let postamble = diff --git a/lib/semant.ml b/lib/semant.ml index 079ee55..0ecfec6 100644 --- a/lib/semant.ml +++ b/lib/semant.ml @@ -195,8 +195,11 @@ module Semant : SEMANT = struct ~init:[] fields |> List.rev in + let is_ptr_list = + List.map ~f:(fun (_, ty) -> Types.is_ptr ty) fields + in { - exp = Translate.recordExp input_field_exps; + exp = Translate.recordExp input_field_exps is_ptr_list; ty = record_type; } | _ -> @@ -349,7 +352,9 @@ module Semant : SEMANT = struct "array initial value expected to be of type %s" (Types.to_string t) ); { - exp = Translate.arrayExp (size_expty.exp, init_expty.exp); + exp = + Translate.arrayExp + (size_expty.exp, init_expty.exp, Types.is_ptr t); ty = ty'; } | _ -> { exp = Translate.default_exp; ty = Types.INT }) diff --git a/lib/translate.ml b/lib/translate.ml index c846862..4e98ffc 100644 --- a/lib/translate.ml +++ b/lib/translate.ml @@ -102,7 +102,8 @@ module Translate = struct let alloc_local l escape = (l, Frame.alloc_local l.frame escape) let default_exp = Ex (Tree.CONST 0) let frags : Frame.frag list ref = ref [] - let str_labels : Temp.label StringMap.t ref = ref StringMap.empty + let str_obj_labels : Temp.label StringMap.t ref = ref StringMap.empty + let str_lit_labels : Temp.label StringMap.t ref = ref StringMap.empty let rec seq = function | [ a ] -> a @@ -164,31 +165,75 @@ module Translate = struct (* Create a new label and attach a string with this label to the fragment list *) let lab = - match StringMap.find_opt s !str_labels with + match StringMap.find_opt s !str_obj_labels with | Some l -> l | None -> let lab = Temp.new_label () in frags := Frame.STRING (lab, s) :: !frags; - str_labels := StringMap.add s lab !str_labels; + str_obj_labels := StringMap.add s lab !str_obj_labels; lab in Ex (T.NAME lab) + (* Generate label for a string literal, this generates an actual literal + in assembly and not a tiger string object. *) + let stringLiteral s = + match StringMap.find_opt s !str_lit_labels with + | Some l -> l + | None -> + let lab = Temp.new_label () in + frags := Frame.STRING_LIT (lab, s) :: !frags; + str_obj_labels := StringMap.add s lab !str_lit_labels; + lab + let binOpPlus e1 e2 = Tree.BINOP (Tree.PLUS, e1, e2) let binOpMul e1 e2 = Tree.BINOP (Tree.MUL, e1, e2) + (* Assume that stdlib functions do not require a static link *) + let callStdlibExp (name, args) = + Ex (Frame.externalCall (name, List.map unEx args)) + (* TODO: Emit code for bounds checking. Idea: Use the first word to store the array length and use that to carry out bounds checking. *) let subscriptVar (var_exp, index_exp) = - Ex - (T.MEM - (binOpPlus (unEx var_exp) - (binOpMul (T.CONST Frame.word_size) (unEx index_exp)))) + (* var_exp + word_size * (2 + index) as the first word is the tag + and the second one is the size of the array. *) - (* Assume that stdlib functions do not require a static link *) - let callStdlibExp (name, args) = - Ex (Frame.externalCall (name, List.map unEx args)) + (* Store the record pointer and index in a register so we don't evaluate + it twice. *) + let r1 = Temp.new_temp () in + let r2 = Temp.new_temp () in + + let fail_label = Temp.new_label () in + let gte_zero_label = Temp.new_label () in + let ok_label = Temp.new_label () in + Ex + (T.ESEQ + ( seq + [ + T.MOVE (T.TEMP r1, unEx var_exp); + T.MOVE (T.TEMP r2, unEx index_exp); + T.CJUMP (T.LT, T.TEMP r2, T.CONST 0, fail_label, gte_zero_label); + T.LABEL gte_zero_label; + T.CJUMP + ( T.GE, + T.TEMP r2, + T.MEM (binOpPlus (T.TEMP r1) (T.CONST Frame.word_size)), + fail_label, + ok_label ); + T.LABEL fail_label; + (* If we encounter a null pointer, print a message and exit *) + unNx + (callStdlibExp + ("print", [ stringExp "Array access is out of bounds\n" ])); + unNx (callStdlibExp ("exit", [ Ex (T.CONST 1) ])); + T.LABEL ok_label; + ], + T.MEM + (binOpPlus (T.TEMP r1) + (binOpMul (T.CONST Frame.word_size) + (binOpPlus (T.TEMP r2) (T.CONST 2)))) )) let fieldVar (var_exp, field_index) = (* Store the record pointer in a register so we can @@ -223,8 +268,10 @@ module Translate = struct (* TODO: We manually did the constant folding here, though it wouldn't been necessary if the compiler implemented constant folding as an optimisation. *) + (* +1 because the first slot is used for the tag *) T.MEM - (binOpPlus (T.TEMP r) (T.CONST (Frame.word_size * field_index))) )) + (binOpPlus (T.TEMP r) + (T.CONST (Frame.word_size * (field_index + 1)))) )) let assignExp (var, exp) = Nx (T.MOVE (unEx var, unEx exp)) @@ -308,10 +355,12 @@ module Translate = struct T.LABEL end_label; ]) - let recordExp fields = - let num_fields = List.length fields in - let record_size = num_fields * Frame.word_size in + let recordExp field_exps is_ptr_list = let r = Temp.new_temp () in + let tag_label = + List.map (fun e -> if e then "p" else "n") is_ptr_list + |> String.concat "" |> stringLiteral + in let _, processed_fields = List.fold_left (fun (idx, fs) field -> @@ -320,8 +369,9 @@ module Translate = struct ( T.MEM (binOpPlus (T.TEMP r) (T.CONST (idx * Frame.word_size))), unEx field ) :: fs )) - (0, []) fields + (1, []) field_exps (* Start from 1 as the first slot is for the tag *) in + Ex (T.ESEQ ( seq @@ -329,12 +379,15 @@ module Translate = struct T.MOVE ( T.TEMP r, unEx - @@ callStdlibExp ("malloc", [ Ex (T.CONST record_size) ]) ); + @@ callStdlibExp ("allocRecord", [ Ex (T.NAME tag_label) ]) + ); ] @ processed_fields), T.TEMP r )) - let arrayExp (size, init) = callStdlibExp ("initArray", [ size; init ]) + let arrayExp (size, init, is_ptr) = + let is_ptr = if is_ptr then Ex (T.CONST 1) else Ex (T.CONST 0) in + callStdlibExp ("initArray", [ size; init; is_ptr ]) let findFunctionStaticLink (fn_level, call_level) = let rec do_one_level curr_level frame_addr = @@ -425,5 +478,6 @@ module Translate = struct let init () = frags := []; - str_labels := StringMap.empty + str_obj_labels := StringMap.empty; + str_lit_labels := StringMap.empty end diff --git a/lib/types.ml b/lib/types.ml index 9c3cc0f..c414c9e 100644 --- a/lib/types.ml +++ b/lib/types.ml @@ -39,6 +39,8 @@ module Types = struct | NIL, RECORD _ -> true | RECORD _, NIL -> true | _ -> false + + let is_ptr = function RECORD _ | ARRAY _ | STRING -> true | _ -> false end open Base diff --git a/test_lib/integration.ml b/test_lib/integration.ml index adebfa6..882bd3d 100644 --- a/test_lib/integration.ml +++ b/test_lib/integration.ml @@ -46,7 +46,9 @@ let%test_unit _ = let cmd_inc, cmd_outc = Core_unix.open_process cmd in let cmd_output = In_channel.input_lines ~fix_win_eol:true cmd_inc - |> List.tl |> Stdlib.Option.get |> String.concat ~sep:"\n" |> sanitise + |> List.tl |> Stdlib.Option.get + |> List.map ~f:(fun s -> s ^ "\n") + |> String.concat ~sep:"" |> sanitise in (match Core_unix.close_process (cmd_inc, cmd_outc) with | Ok _ -> diff --git a/tests/integration/expected/test-array-of-records.output b/tests/integration/expected/test-array-of-records.output index c76a964..4b51ddb 100644 --- a/tests/integration/expected/test-array-of-records.output +++ b/tests/integration/expected/test-array-of-records.output @@ -1 +1 @@ -abcdefghij \ No newline at end of file +abcdefghij diff --git a/tests/integration/expected/test-closure2.output b/tests/integration/expected/test-closure2.output index 29deb3d..be2f4ac 100644 --- a/tests/integration/expected/test-closure2.output +++ b/tests/integration/expected/test-closure2.output @@ -1 +1 @@ -012345678901234567890 \ No newline at end of file +012345678901234567890 diff --git a/tests/integration/expected/test-closure3.output b/tests/integration/expected/test-closure3.output index 29deb3d..be2f4ac 100644 --- a/tests/integration/expected/test-closure3.output +++ b/tests/integration/expected/test-closure3.output @@ -1 +1 @@ -012345678901234567890 \ No newline at end of file +012345678901234567890 diff --git a/tests/integration/expected/test-fn-many-args.output b/tests/integration/expected/test-fn-many-args.output index c76a964..4b51ddb 100644 --- a/tests/integration/expected/test-fn-many-args.output +++ b/tests/integration/expected/test-fn-many-args.output @@ -1 +1 @@ -abcdefghij \ No newline at end of file +abcdefghij diff --git a/tests/integration/expected/test-for-loop-break.output b/tests/integration/expected/test-for-loop-break.output index ad47100..11f11f9 100644 --- a/tests/integration/expected/test-for-loop-break.output +++ b/tests/integration/expected/test-for-loop-break.output @@ -1 +1 @@ -0123456789 \ No newline at end of file +0123456789 diff --git a/tests/integration/expected/test-string-array.output b/tests/integration/expected/test-string-array.output index c76a964..4b51ddb 100644 --- a/tests/integration/expected/test-string-array.output +++ b/tests/integration/expected/test-string-array.output @@ -1 +1 @@ -abcdefghij \ No newline at end of file +abcdefghij diff --git a/tests/integration/expected/test-while-loop-break.output b/tests/integration/expected/test-while-loop-break.output index ad47100..11f11f9 100644 --- a/tests/integration/expected/test-while-loop-break.output +++ b/tests/integration/expected/test-while-loop-break.output @@ -1 +1 @@ -0123456789 \ No newline at end of file +0123456789 diff --git a/tests/integration/expected/test-while-loop.output b/tests/integration/expected/test-while-loop.output index ad47100..11f11f9 100644 --- a/tests/integration/expected/test-while-loop.output +++ b/tests/integration/expected/test-while-loop.output @@ -1 +1 @@ -0123456789 \ No newline at end of file +0123456789