diff --git a/.gitignore b/.gitignore index 9b365da..eced76f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ _opam .merlin *.swp *.so -*.o \ No newline at end of file +*.o +*.exe +*.out \ No newline at end of file diff --git a/Makefile b/Makefile index f47f291..567e3fd 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,23 @@ -.PHONY : build link all run +.PHONY : build-14 link-14 all-14 run-14 build link all run + +# Check `README.md` for pre-requisite + +# These are for LLVM < 15 and OCaml < 5 + +build-14 : + dune build bin/kaleidoscope_14.exe + +link-14 : + rm -f k_14.exe + ln -s _build/default/bin/kaleidoscope_14.exe k_14.exe + +run-14 : + LD_LIBRARY_PATH=_build/default/stubs ./k_14.exe + +demo-14: + dune exec bin/kaleidoscope_14.exe < example/mandel.ks + +# These are for LLVM >= 15 and OCaml >= 5 build : dune build bin/kaleidoscope.exe @@ -7,14 +26,20 @@ link : rm -f k.exe ln -s _build/default/bin/kaleidoscope.exe k.exe -all: build link - run : LD_LIBRARY_PATH=_build/default/stubs ./k.exe +demo: + dune exec bin/kaleidoscope.exe < example/mandel.ks + +# Not covering yet + +test: + dune test lib + ch8-main: clang++ main.cpp output.o -o main ch9-clang: - LD_LIBRARY_PATH=_build/default/stubs ./k.exe < fib.ks 2>&1 | clang -x ir - + LD_LIBRARY_PATH=_build/default/stubs ./k_14.exe < example/fib.ks 2>&1 | clang -x ir - diff --git a/README.md b/README.md index fa6be35..ccb78ab 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,20 @@ # llvm-ocaml-tutorial -The tutorial is tested up to OCaml 4.14.1 and LLVM 14.0.6. +## Introduction + +The tutorial is Kaleidoscope on OCaml. It's not intended as a fully translation of [LLVM Kaleidoscope tutorial](https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html), but a demonstration of basic LLVM APIs in OCaml. + +The tutorial is tested with combinations of + +1. opam `llvm.16.0.6+nnp` and OCaml 5.1.0 (should be released soon). +2. opam `llvm.14.0.6` and OCaml 4.14.1. + +In opam packages, `llvm.16.0.6+nnp` uses _Opaque Pointers_ and `llvm.14.0.6` uses _Typed pointers_. In official LLVM, _Opaque Pointers_ are enabled by default from LLVM 15 and _Typed pointers_ are not supported from LLVM 17. See [LLVM Opaque Pointers](https://llvm.org/docs/OpaquePointers.html) for details. ## Build +Since the changes in LLVM is breakable but tiny in their APIs until now, for better or for worse, there is some duplicate code in the repo. Combination 1 uses `bin/kaleidoscope.ml` and `lib`. Combination 2 uses `bin/kaleidoscope.ml_14.ml` and `lib-14`. The rest are shared. + ```console # Install the dependencies opam install base ctypes-foreign llvm menhir ppx_jane @@ -15,21 +26,94 @@ dune build stubs/libbindings.so export LD_LIBRARY_PATH=_build/default/stubs ``` -## Run +## Run (Hurry) ```console +# with llvm 14.0.6 +dune exec bin/kaleidoscope_14.exe < example/mandel.kal +``` + +```console +# with llvm 16.0.6+nnp or beyond dune exec bin/kaleidoscope.exe < example/mandel.kal ``` -## Test +## Run (Step-wise) + +See `Makefile`. ```console -dune test +# with llvm 14.0.6 +make build-14 +make link-14 +make demo-14 +make run-14 ``` +```console +# with llvm 16.0.6+nnp or beyond +make build +make link +make demo +make run +``` ## Meta +Currently, the difference between two version of code is tiny. + +The result of `diff bin/kaleidoscope.ml bin/kaleidoscope_14` +```diff +7c7 +< Kaleidoscope_lib.Toplevel.main !dest +--- +> Kaleidoscope_lib_14.Toplevel.main !dest +``` + +The result of `diff lib lib-14-legacy` + +```diff +diff '--color=auto' lib/codegen.ml lib-14-legacy/codegen.ml +57c57 +< | Some v -> Llvm.build_load double_type v name builder) +--- +> | Some v -> Llvm.build_load v name builder) +96,99c96 +< let fnty = +< Llvm.function_type double_type (Array.of_list [ double_type; double_type ]) +< in +< Llvm.build_call fnty callee [| lhs_val; rhs_val |] "binop" builder) +--- +> Llvm.build_call callee [| lhs_val; rhs_val |] "binop" builder) +112,114c109 +< let arg_typs = Array.map args ~f:(Fn.const double_type) in +< let fnty = Llvm.function_type double_type arg_typs in +< Llvm.build_call fnty callee args "calltmp" builder +--- +> Llvm.build_call callee args "calltmp" builder +211,212c206 +< let cur_var = Llvm.build_load double_type alloca var_name builder in +< (* let cur_var = Llvm.build_load (Llvm.type_of alloca) alloca var_name builder in *) +--- +> let cur_var = Llvm.build_load alloca var_name builder in +238,239c232 +< let fnty = Llvm.function_type double_type (Array.of_list [ double_type ]) in +< Llvm.build_call fnty callee [| operand |] "unop" builder +--- +> Llvm.build_call callee [| operand |] "unop" builder +diff '--color=auto' lib/dune lib-14-legacy/dune +2c2 +< (name kaleidoscope_lib) +--- +> (name kaleidoscope_lib_14) +``` + Thank to @Kakadu's PR. The project is originally forked from https://github.com/adamrk/llvm-ocaml-tutorial. +## Todo + +- [ ] Redo testing +- [ ] Use vanilla library instead of `Base` +- [ ] Update the missing part of the official Kaleidoscope tutorial +- [ ] Rewrite the tutorial in OCaml diff --git a/bin/dune b/bin/dune index 55db530..824c898 100644 --- a/bin/dune +++ b/bin/dune @@ -1,14 +1,27 @@ +(executable + (name kaleidoscope_14) + (modules kaleidoscope_14) + (libraries kaleidoscope_lib_14) + (link_deps + (file %{project_root}/stubs/libbindings.so)) + (link_flags -cclib -Lstubs -cclib -lbindings)) + +(cram + (deps + ./kaleidoscope_14.exe + %{project_root}/stubs/libbindings.so + %{project_root}/example/fib.ks)) + (executable (name kaleidoscope) - (libraries base kaleidoscope_lib) + (modules kaleidoscope) + (libraries kaleidoscope_lib) (link_deps (file %{project_root}/stubs/libbindings.so)) - (link_flags -cclib -Lstubs -cclib -lbindings) - (preprocess - (pps ppx_jane ppx_expect))) + (link_flags -cclib -Lstubs -cclib -lbindings)) (cram (deps ./kaleidoscope.exe %{project_root}/stubs/libbindings.so - %{project_root}/fib.ks)) + %{project_root}/example/fib.ks)) diff --git a/bin/kaleidoscope.ml b/bin/kaleidoscope.ml index e206fb6..7af20d2 100644 --- a/bin/kaleidoscope.ml +++ b/bin/kaleidoscope.ml @@ -1,11 +1,8 @@ let () = let dest = ref `Stdin in Arg.parse - [ - ( "-file", - Arg.String (fun s -> dest := `File s), - " FILE read input from file" ); - ] + [ "-file", Arg.String (fun s -> dest := `File s), " FILE read input from file" ] (fun _ -> failwith "Anonymous ones are not supported") "Parse and print kaleidoscope"; Kaleidoscope_lib.Toplevel.main !dest +;; diff --git a/bin/kaleidoscope.mli b/bin/kaleidoscope.mli deleted file mode 100644 index 999186a..0000000 --- a/bin/kaleidoscope.mli +++ /dev/null @@ -1 +0,0 @@ -(*_ mli intentionally blank for executable *) diff --git a/bin/kaleidoscope_14.ml b/bin/kaleidoscope_14.ml new file mode 100644 index 0000000..098ea10 --- /dev/null +++ b/bin/kaleidoscope_14.ml @@ -0,0 +1,8 @@ +let () = + let dest = ref `Stdin in + Arg.parse + [ "-file", Arg.String (fun s -> dest := `File s), " FILE read input from file" ] + (fun _ -> failwith "Anonymous ones are not supported") + "Parse and print kaleidoscope"; + Kaleidoscope_lib_14.Toplevel.main !dest +;; diff --git a/fib.ks b/example/fib.ks similarity index 100% rename from fib.ks rename to example/fib.ks diff --git a/example/mandel.kal b/example/mandel.ks similarity index 100% rename from example/mandel.kal rename to example/mandel.ks diff --git a/lib/ast.ml b/lang-ks/ast.ml similarity index 61% rename from lib/ast.ml rename to lang-ks/ast.ml index 3a6fa01..0c43cc7 100644 --- a/lib/ast.ml +++ b/lang-ks/ast.ml @@ -69,39 +69,37 @@ module Expr = struct * into a single term and then recurse until there is one term. *) let rec reduce first rest = match rest with - | (first_op, first_prec, _) :: tail -> ( - let index = - (* search for the index of the operator with highest precedence *) - List.foldi tail ~init:(first_op, first_prec, 0) - ~f:(fun - new_inx - (highest_op, highest_prec, inx) - (new_op, new_prec, _new_expr) - -> - if Int.( > ) new_prec highest_prec then - (new_op, new_prec, new_inx + 1) - else (highest_op, highest_prec, inx)) - |> fun (_, _, index) -> index - in - match index with - (* if the first operator has precedence, combine [first] and [rest[0]] - * into new [first] and set [rest] to [tail rest]. *) - | 0 -> - let to_reduce = List.hd_exn rest in - let expr = Bin_list (first, [ to_reduce ]) in - reduce expr (List.tl_exn rest) - (* if it's index n > 0 then combine the terms at index [n] and [n-1] - * into the new [rest]. *) - | n -> - let to_reduce = List.nth_exn rest n in - let prev_op, prev_prec, prev_expr = List.nth_exn rest (n - 1) in - let new_expr = - (prev_op, prev_prec, Bin_list (prev_expr, [ to_reduce ])) - in - reduce first - (List.take rest (n - 1) @ (new_expr :: List.drop rest (n + 1)))) + | (first_op, first_prec, _) :: tail -> + let index = + (* search for the index of the operator with highest precedence *) + List.foldi + tail + ~init:(first_op, first_prec, 0) + ~f: + (fun + new_inx (highest_op, highest_prec, inx) (new_op, new_prec, _new_expr) -> + if Int.( > ) new_prec highest_prec + then new_op, new_prec, new_inx + 1 + else highest_op, highest_prec, inx) + |> fun (_, _, index) -> index + in + (match index with + (* if the first operator has precedence, combine [first] and [rest[0]] + * into new [first] and set [rest] to [tail rest]. *) + | 0 -> + let to_reduce = List.hd_exn rest in + let expr = Bin_list (first, [ to_reduce ]) in + reduce expr (List.tl_exn rest) + (* if it's index n > 0 then combine the terms at index [n] and [n-1] + * into the new [rest]. *) + | n -> + let to_reduce = List.nth_exn rest n in + let prev_op, prev_prec, prev_expr = List.nth_exn rest (n - 1) in + let new_expr = prev_op, prev_prec, Bin_list (prev_expr, [ to_reduce ]) in + reduce first (List.take rest (n - 1) @ (new_expr :: List.drop rest (n + 1)))) (* once there's only one term left we're done *) | [] -> first + ;; end type t = @@ -128,50 +126,48 @@ module Expr = struct | No_binop.Variable x -> Variable x | No_binop.Call (f, args) -> Call (f, List.map args ~f:of_no_binop) | No_binop.If (if_, then_, else_) -> - If (of_no_binop if_, of_no_binop then_, of_no_binop else_) + If (of_no_binop if_, of_no_binop then_, of_no_binop else_) | No_binop.For (id, start, end_, step, body) -> - For - ( id, - of_no_binop start, - of_no_binop end_, - Option.map step ~f:of_no_binop, - of_no_binop body ) + For + ( id + , of_no_binop start + , of_no_binop end_ + , Option.map step ~f:of_no_binop + , of_no_binop body ) | No_binop.Unary (c, t) -> Unary (c, of_no_binop t) | No_binop.Var (vars, body) -> - Var - ( List.map vars ~f:(fun (name, expr) -> - (name, Option.map expr ~f:of_no_binop)), - of_no_binop body ) + Var + ( List.map vars ~f:(fun (name, expr) -> name, Option.map expr ~f:of_no_binop) + , of_no_binop body ) | No_binop.Bin_list (first, []) -> of_no_binop first | No_binop.Bin_list (first, [ (op, _prec, second) ]) -> - Binary (op, of_no_binop first, of_no_binop second) - | No_binop.Bin_list (first, rest) -> - of_no_binop (No_binop.reduce first rest) + Binary (op, of_no_binop first, of_no_binop second) + | No_binop.Bin_list (first, rest) -> of_no_binop (No_binop.reduce first rest) + ;; let%expect_test _ = let no_binop = No_binop.Bin_list - ( No_binop.Variable "x", - [ ('*', 40, No_binop.Variable "y"); ('+', 20, No_binop.Variable "z") ] - ) + ( No_binop.Variable "x" + , [ '*', 40, No_binop.Variable "y"; '+', 20, No_binop.Variable "z" ] ) in - Caml.Format.printf !"%{sexp: t}" (of_no_binop no_binop); + Stdlib.Format.printf !"%{sexp: t}" (of_no_binop no_binop); [%expect {| (Binary + (Binary * (Variable x) (Variable y)) (Variable z)) |}]; let no_binop = No_binop.Bin_list - ( No_binop.Variable "x", - [ - ('*', 40, No_binop.Variable "y"); - ('+', 20, No_binop.Variable "z"); - ('*', 40, No_binop.Variable "w"); + ( No_binop.Variable "x" + , [ '*', 40, No_binop.Variable "y" + ; '+', 20, No_binop.Variable "z" + ; '*', 40, No_binop.Variable "w" ] ) in - Caml.Format.printf !"%{sexp: t}" (of_no_binop no_binop); + Stdlib.Format.printf !"%{sexp: t}" (of_no_binop no_binop); [%expect {| (Binary + (Binary * (Variable x) (Variable y)) (Binary * (Variable z) (Variable w))) |}] + ;; end (* func - This type represents a function definition itself. *) @@ -179,15 +175,16 @@ type func = Function of proto * Expr.t [@@deriving sexp] let func_of_no_binop_func (Expr.No_binop.Function (proto, body)) = Function (proto, Expr.of_no_binop body) +;; let set_func_name name (Function (proto, body)) = let new_proto = match proto with | Prototype ((_name : string), args) -> Prototype (name, args) - | BinOpPrototype ((_name : string), args, prec) -> - BinOpPrototype (name, args, prec) + | BinOpPrototype ((_name : string), args, prec) -> BinOpPrototype (name, args, prec) in Function (new_proto, body) +;; (* this holds the precedence for each binary operator that is defined. It can * be mutated if new binops are defined *) diff --git a/lib/ast.mli b/lang-ks/ast.mli similarity index 100% rename from lib/ast.mli rename to lang-ks/ast.mli diff --git a/lang-ks/dune b/lang-ks/dune new file mode 100644 index 0000000..c4b8eb6 --- /dev/null +++ b/lang-ks/dune @@ -0,0 +1,12 @@ +(library + (name lang_ks) + (libraries base menhirLib) + (preprocess + (pps ppx_jane ppx_expect ppx_let)) + (inline_tests)) + +(ocamllex lexer) + +(menhir + (flags --external-tokens Ast --explain --table) + (modules parser)) diff --git a/lib/lexer.mll b/lang-ks/lexer.mll similarity index 100% rename from lib/lexer.mll rename to lang-ks/lexer.mll diff --git a/lib/parse_tests.ml b/lang-ks/parse_tests.ml similarity index 95% rename from lib/parse_tests.ml rename to lang-ks/parse_tests.ml index b2ce36e..620969a 100644 --- a/lib/parse_tests.ml +++ b/lang-ks/parse_tests.ml @@ -1,11 +1,11 @@ open Base -open Caml.Format let print_parsed s = - printf - !"%{sexp: [`Expr of Ast.Expr.No_binop.func | `Extern of Ast.proto | `Def \ - of Ast.Expr.No_binop.func | `Eof ]}" + Stdlib.Format.printf + !"%{sexp: [`Expr of Ast.Expr.No_binop.func | `Extern of Ast.proto | `Def of \ + Ast.Expr.No_binop.func | `Eof ]}" (Parser.toplevel Lexer.read (Lexing.from_string s)) +;; let%expect_test _ = print_parsed "LHS < RHS;"; @@ -66,3 +66,4 @@ let%expect_test _ = [%expect {| (Expr (Function (Prototype "" ()) (Call foo ((Number 2))))) |}]; print_parsed "extern sin(x);"; [%expect {| (Extern (Prototype sin (x))) |}] +;; diff --git a/lib/parser.mly b/lang-ks/parser.mly similarity index 100% rename from lib/parser.mly rename to lang-ks/parser.mly diff --git a/lib-14/codegen.ml b/lib-14/codegen.ml new file mode 100644 index 0000000..f0d434a --- /dev/null +++ b/lib-14/codegen.ml @@ -0,0 +1,322 @@ +open Base +open Lang_ks + +let context = Llvm.global_context () +let the_module = Llvm.create_module context "main" +let builder = Llvm.builder context +let named_values : (string, Llvm.llvalue) Hashtbl.t = Hashtbl.create (module String) +let double_type = Llvm.double_type context + +(* Create an alloca instruction in the entry block of the function. This + * is used for mutable variables etc. *) +let create_entry_block_alloca the_function var_name = + let builder = + Llvm.builder_at context (Llvm.instr_begin (Llvm.entry_block the_function)) + in + Llvm.build_alloca double_type var_name builder +;; + +let rec codegen_expr = function + | Ast.Expr.Var (var_names, body) -> + let old_bindings = ref [] in + let the_function = Llvm.block_parent (Llvm.insertion_block builder) in + (* Register all variables and emit their initializer. *) + List.iter var_names ~f:(fun (var_name, init) -> + (* Emit the initializer before adding the variable to scope, this + * prevents the initializer from referencing the variable itself, and + * permits stuff like this: + * var a = 1 in + * var a = a in ... # refers to outer 'a'. *) + let init_val = + match init with + | Some init -> codegen_expr init + (* If not specified, use 0.0. *) + | None -> Llvm.const_float double_type 0.0 + in + let alloca = create_entry_block_alloca the_function var_name in + Llvm.build_store init_val alloca builder |> ignore; + (* Remember the old variable binding so that we can restore the binding + * when we unrecurse. *) + (match Hashtbl.find named_values var_name with + | None -> () + | Some old_value -> old_bindings := (var_name, old_value) :: !old_bindings); + (* Remember this binding. *) + Hashtbl.set named_values ~key:var_name ~data:alloca); + (* Codegen the body, now that all vars are in scope. *) + let body_val = codegen_expr body in + (* Pop all our variables from scope. *) + List.iter !old_bindings ~f:(fun (var_name, old_value) -> + Hashtbl.set named_values ~key:var_name ~data:old_value); + (* Return the body computation. *) + body_val + | Ast.Expr.Number n -> Llvm.const_float double_type n + | Ast.Expr.Variable name -> + (match Hashtbl.find named_values name with + | None -> raise_s [%message "unkown variable name" (name : string)] + (* Load the value *) + | Some v -> Llvm.build_load v name builder) + | Ast.Expr.Binary ('=', lhs, rhs) -> + (* Special case '=' because we don't want to emit the LHS as an + * expression. *) + let name = + match lhs with + | Ast.Expr.Variable name -> name + | _ -> raise_s [%message "destination of '=' must be a variable"] + in + (* Codegen the rhs. *) + let val_ = codegen_expr rhs in + (* Lookup the name. *) + let variable = + match Hashtbl.find named_values name with + | None -> raise_s [%message "unknown variable name" (name : string)] + | Some var -> var + in + Llvm.build_store val_ variable builder |> ignore; + val_ + | Ast.Expr.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + (match op with + | '+' -> Llvm.build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> Llvm.build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> Llvm.build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + let i = Llvm.build_fcmp Llvm.Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + (* Convert bool 0/1 to double 0.0 or 1.0 *) + Llvm.build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ String.make 1 op in + let callee = + match Llvm.lookup_function callee the_module with + | Some callee -> callee + | None -> raise_s [%message "unrecognized binop" (op : char)] + in + Llvm.build_call callee [| lhs_val; rhs_val |] "binop" builder) + | Ast.Expr.Call (callee_name, args) -> + (* Look up the name in the module table. *) + let callee = + match Llvm.lookup_function callee_name the_module with + | Some callee -> callee + | None -> raise_s [%message "undefined function" (callee_name : string)] + in + (* If argument mismatch error. *) + if Int.( = ) (Array.length (Llvm.params callee)) (List.length args) + then () + else raise_s [%message "incorrect number of arguments" (callee_name : string)]; + let args = Array.map (Array.of_list args) ~f:codegen_expr in + Llvm.build_call callee args "calltmp" builder + | Ast.Expr.If (condition, then_, else_) -> + let cond = codegen_expr condition in + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = Llvm.const_float double_type 0.0 in + let cond_val = Llvm.build_fcmp Llvm.Fcmp.One cond zero "ifcond" builder in + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = Llvm.insertion_block builder in + let the_function = Llvm.block_parent start_bb in + let then_bb = Llvm.append_block context "then" the_function in + (* Emit 'then' value. *) + Llvm.position_at_end then_bb builder; + let then_val = codegen_expr then_ in + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = Llvm.insertion_block builder in + (* Emit 'else' value. *) + let else_bb = Llvm.append_block context "else" the_function in + Llvm.position_at_end else_bb builder; + let else_val = codegen_expr else_ in + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = Llvm.insertion_block builder in + (* Emit merge block. *) + let merge_bb = Llvm.append_block context "ifcont" the_function in + Llvm.position_at_end merge_bb builder; + let incoming = [ then_val, new_then_bb; else_val, new_else_bb ] in + let phi = Llvm.build_phi incoming "iftmp" builder in + (* Return to the start block to add the conditional branch. *) + Llvm.position_at_end start_bb builder; + Llvm.build_cond_br cond_val then_bb else_bb builder |> ignore; + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + Llvm.position_at_end new_then_bb builder; + Llvm.build_br merge_bb builder |> ignore; + Llvm.position_at_end new_else_bb builder; + Llvm.build_br merge_bb builder |> ignore; + (* Finally, set the builder to the end of the merge block. *) + Llvm.position_at_end merge_bb builder; + phi + | Ast.Expr.For (var_name, start, end_, step, body) -> + (* Output this as: + * var = alloca double + * ... + * start = startexpr + * store start -> var + * goto loop + * loop: + * ... + * bodyexpr + * ... + * loopend: + * step = stepexpr + * endcond = endexpr + * + * curvar = load var + * nextvar = curvar + step + * store nextvar -> var + * br endcond, loop, endloop + * outloop: *) + let the_function = Llvm.block_parent (Llvm.insertion_block builder) in + (* Create an alloca for the variable in the entry block. *) + let alloca = create_entry_block_alloca the_function var_name in + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + (* Store the value into the alloca. *) + Llvm.build_store start_val alloca builder |> ignore; + (* Make the new basic block for the loop header, inserting after current + * block. *) + let loop_bb = Llvm.append_block context "loop" the_function in + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + Llvm.build_br loop_bb builder |> ignore; + (* Start insertion in loop_bb. *) + Llvm.position_at_end loop_bb builder; + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = Hashtbl.find named_values var_name in + Hashtbl.set named_values ~key:var_name ~data:alloca; + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + codegen_expr body |> ignore; + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> Llvm.const_float double_type 1.0 + in + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + (* Reload, increment, and restore the alloca. This handles the case where + * the body of the loop mutates the variable. *) + let cur_var = Llvm.build_load alloca var_name builder in + let next_var = Llvm.build_fadd cur_var step_val "nextvar" builder in + Llvm.build_store next_var alloca builder |> ignore; + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = Llvm.const_float double_type 0.0 in + let end_cond = Llvm.build_fcmp Llvm.Fcmp.One end_cond zero "loopcond" builder in + (* Create the "after loop" block and insert it. *) + let after_bb = Llvm.append_block context "afterloop" the_function in + (* Insert the conditional branch into the end of loop_end_bb. *) + Llvm.build_cond_br end_cond loop_bb after_bb builder |> ignore; + (* Any new code will be inserted in after_bb. *) + Llvm.position_at_end after_bb builder; + (* Restore the unshadowed variable. *) + (match old_val with + | Some old_val -> Hashtbl.set named_values ~key:var_name ~data:old_val + | None -> ()); + (* for expr always returns 0.0. *) + Llvm.const_null double_type + | Ast.Expr.Unary (op, operand) -> + let operand = codegen_expr operand in + let callee = "unary" ^ String.make 1 op in + let callee = + match Llvm.lookup_function callee the_module with + | Some callee -> callee + | None -> raise_s [%message "unknown unary operator" (op : char)] + in + Llvm.build_call callee [| operand |] "unop" builder +;; + +let codegen_proto_existing = function + | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> + (* Make the function type: double(double,double) etc. *) + Hashtbl.clear named_values; + let doubles = Array.create ~len:(List.length args) double_type in + let ft = Llvm.function_type double_type doubles in + let f, existing = + match Llvm.lookup_function name the_module with + | None -> Llvm.declare_function name ft the_module, `Existing + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if Int.(Array.length (Llvm.basic_blocks f) = 0) + then () + else raise_s [%message "redefinition of function" (name : string)]; + (* If 'f' took a different number of arguments, reject. *) + if Int.(Array.length (Llvm.params f) = List.length args) + then () + else + raise_s + [%message + "redefinition of function with a different number of args" (name : string)]; + f, `Not_existing + in + (* Set names for all arguments. *) + Array.iteri (Llvm.params f) ~f:(fun i a -> + let name = List.nth_exn args i in + Llvm.set_value_name name a; + Hashtbl.add_exn named_values ~key:name ~data:a); + f, existing +;; + +(* Create an alloca for each argument and register the argument in the symbol + * table so that references to it will succeed. *) +let create_argument_allocas the_function proto = + let args = + match proto with + | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args + in + Array.iteri (Llvm.params the_function) ~f:(fun i ai -> + let var_name = List.nth_exn args i in + (* Create an alloca for this variable. *) + let alloca = create_entry_block_alloca the_function var_name in + (* Store the initial value into the alloca. *) + Llvm.build_store ai alloca builder |> ignore; + (* Add arguments to variable symbol table. *) + Hashtbl.set named_values ~key:var_name ~data:alloca) +;; + +let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + let the_function, existing = codegen_proto_existing proto in + (* If this is an operator, install it. *) + (match proto with + | Ast.BinOpPrototype (name, _args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add_exn Ast.binop_precedence ~key:op ~data:prec + | _ -> ()); + (* Create a new basic block to start insertion into. *) + let bb = Llvm.append_block context "entry" the_function in + Llvm.position_at_end bb builder; + (try + (* Add all arguments to the symbol table and create their allocas. *) + create_argument_allocas the_function proto; + let return_val = codegen_expr body in + (* Finish off the function. *) + let (_ : Llvm.llvalue) = Llvm.build_ret return_val builder in + (* Validate the generated code, checking for consistency. *) + (match Llvm_analysis.verify_function the_function with + | true -> () + | false -> + Stdlib.Format.printf + "invalid function generated\n%s\n" + (Llvm.string_of_llvalue the_function); + Llvm_analysis.assert_valid_function the_function); + (* Optimize the function. *) + let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in + the_function + with + | e -> + (match existing with + | `Not_existing -> Llvm.delete_function the_function + | `Existing -> Array.iter (Llvm.basic_blocks the_function) ~f:Llvm.delete_block); + raise e) +;; + +let codegen_proto proto = codegen_proto_existing proto |> fst diff --git a/lib-14/codegen.mli b/lib-14/codegen.mli new file mode 100644 index 0000000..73a273f --- /dev/null +++ b/lib-14/codegen.mli @@ -0,0 +1,5 @@ +open Lang_ks + +val the_module : Llvm.llmodule +val codegen_proto : Ast.proto -> Llvm.llvalue +val codegen_func : [ `Function ] Llvm.PassManager.t -> Ast.func -> Llvm.llvalue diff --git a/lib-14/dune b/lib-14/dune new file mode 100644 index 0000000..3b95197 --- /dev/null +++ b/lib-14/dune @@ -0,0 +1,16 @@ +(library + (name kaleidoscope_lib_14) + (libraries + ctypes.foreign + llvm + llvm.analysis + llvm.executionengine + llvm.target + llvm.scalar_opts + llvm.all_backends + stdio + menhirLib + lang_ks) + (preprocess + (pps ppx_jane ppx_expect ppx_let)) + (inline_tests)) diff --git a/lib-14/kaleidoscope_lib.ml b/lib-14/kaleidoscope_lib.ml new file mode 100644 index 0000000..162c209 --- /dev/null +++ b/lib-14/kaleidoscope_lib.ml @@ -0,0 +1,4 @@ +module Codegen = Codegen +module Lexer = Lang_ks.Lexer +module Parser = Lang_ks.Parser +module Toplevel = Toplevel diff --git a/lib-14/toplevel.ml b/lib-14/toplevel.ml new file mode 100644 index 0000000..b4ff06c --- /dev/null +++ b/lib-14/toplevel.ml @@ -0,0 +1,127 @@ +open Base +open Lang_ks + +let dump_to_object ~the_fpm = + Llvm_all_backends.initialize (); + (* "x86_64-pc-linux-gnu" *) + let target_triple = Llvm_target.Target.default_triple () in + let target = Llvm_target.Target.by_triple target_triple in + let cpu = "generic" in + let reloc_mode = Llvm_target.RelocMode.Default in + let machine = + Llvm_target.TargetMachine.create ~triple:target_triple ~cpu ~reloc_mode target + in + let data_layout = + Llvm_target.TargetMachine.data_layout machine |> Llvm_target.DataLayout.as_string + in + Llvm.set_target_triple target_triple Codegen.the_module; + Llvm.set_data_layout data_layout Codegen.the_module; + let filename = "output.o" in + Llvm_target.TargetMachine.add_analysis_passes the_fpm machine; + let file_type = Llvm_target.CodeGenFileType.ObjectFile in + Llvm_target.TargetMachine.emit_to_file Codegen.the_module file_type filename machine; + (* printf "Wrote %s\n" filename; *) + () +;; + +let run_main in_channel ~the_fpm ~the_execution_engine = + let anonymous_func_count = ref 0 in + let supplier = + Parser.MenhirInterpreter.lexer_lexbuf_to_supplier + Lexer.read + (Lexing.from_channel in_channel) + in + let rec run_loop the_fpm the_execution_engine supplier = + let incremental = Parser.Incremental.toplevel Lexing.dummy_pos in + (* printf "\n" ; + printf "ready> " ; *) + Out_channel.flush Stdio.stdout; + (try + match Parser.MenhirInterpreter.loop supplier incremental with + | `Expr ast -> + (* printf "parsed a toplevel expression" ; *) + (* Evaluate a top-level expression into an anonymous function. *) + let func = Ast.func_of_no_binop_func ast in + Out_channel.flush Stdio.stdout; + Llvm_executionengine.add_module Codegen.the_module the_execution_engine; + anonymous_func_count := !anonymous_func_count + 1; + let _tmp_name = Printf.sprintf "__toplevel%d" !anonymous_func_count in + let tmp_func = Ast.set_func_name "main" func in + let _the_function = Codegen.codegen_func the_fpm tmp_func in + () + (* Llvm.dump_value the_function ; + (* JIT the function, returning a function pointer. *) + let fp = + Llvm_executionengine.get_function_address tmp_name + (Foreign.funptr Ctypes.(void @-> returning double)) + the_execution_engine + in + printf "Evaluated to %f" (fp ()) ; + Llvm_executionengine.remove_module Codegen.the_module + the_execution_engine *) + | `Extern ext -> + (* printf "parsed an extern" ; + printf !"%{sexp: Ast.proto}\n" ext; *) + Out_channel.flush Stdio.stdout; + let _code = Codegen.codegen_proto ext in + () + (* Llvm.dump_value (Codegen.codegen_proto ext) *) + | `Def def -> + (* printf "parsed a definition" ; *) + let func = Ast.func_of_no_binop_func def in + (* printf !"%{sexp: Ast.func}\n" func; *) + Out_channel.flush Stdio.stdout; + let _code = Codegen.codegen_func the_fpm func in + () + (* Llvm.dump_value code *) + | `Eof -> + (* printf "\n\n" ; + printf "reached eof\n" ; *) + (* printf "module dump:\n" ; *) + Out_channel.flush Out_channel.stdout; + (* Add ch8 *) + dump_to_object ~the_fpm; + (* Print out all the generated code. *) + Llvm.dump_module Codegen.the_module; + Stdlib.exit 0 + with + | e -> + (* Skip expression for error recovery. *) + Stdlib.Printf.printf !"\nencountered an error %{sexp: exn}" e); + Out_channel.flush Out_channel.stdout; + run_loop the_fpm the_execution_engine supplier + in + run_loop the_fpm the_execution_engine supplier +;; + +let main input = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add_exn Ast.binop_precedence ~key:'=' ~data:2; + Hashtbl.add_exn Ast.binop_precedence ~key:'<' ~data:10; + Hashtbl.add_exn Ast.binop_precedence ~key:'+' ~data:20; + Hashtbl.add_exn Ast.binop_precedence ~key:'-' ~data:20; + Hashtbl.add_exn Ast.binop_precedence ~key:'*' ~data:40; + (* Create the JIT *) + let the_execution_engine = + (match Llvm_executionengine.initialize () with + | true -> () + | false -> raise_s [%message "failed to initialize"]); + Llvm_executionengine.create Codegen.the_module + in + let the_fpm = Llvm.PassManager.create_function Codegen.the_module in + (* Promote allocas to registers. *) + Llvm_scalar_opts.add_memory_to_register_promotion the_fpm; + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + Llvm_scalar_opts.add_instruction_combination the_fpm; + (* reassociate expressions. *) + Llvm_scalar_opts.add_reassociation the_fpm; + (* Eliminate Common SubExpressions. *) + Llvm_scalar_opts.add_gvn the_fpm; + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + Llvm_scalar_opts.add_cfg_simplification the_fpm; + Llvm.PassManager.initialize the_fpm |> ignore; + match input with + | `Stdin -> run_main ~the_execution_engine ~the_fpm In_channel.stdin + | `File file -> In_channel.with_open_text file (run_main ~the_execution_engine ~the_fpm) +;; diff --git a/lib-14/toplevel.mli b/lib-14/toplevel.mli new file mode 100644 index 0000000..acc70b3 --- /dev/null +++ b/lib-14/toplevel.mli @@ -0,0 +1 @@ +val main : [`Stdin | `File of string] -> unit diff --git a/lib/codegen.ml b/lib/codegen.ml index ba31ec2..f1855e7 100644 --- a/lib/codegen.ml +++ b/lib/codegen.ml @@ -1,12 +1,10 @@ open Base +open Lang_ks let context = Llvm.global_context () let the_module = Llvm.create_module context "main" let builder = Llvm.builder context - -let named_values : (string, Llvm.llvalue) Hashtbl.t = - Hashtbl.create (module String) - +let named_values : (string, Llvm.llvalue) Hashtbl.t = Hashtbl.create (module String) let double_type = Llvm.double_type context (* Create an alloca instruction in the entry block of the function. This @@ -16,259 +14,263 @@ let create_entry_block_alloca the_function var_name = Llvm.builder_at context (Llvm.instr_begin (Llvm.entry_block the_function)) in Llvm.build_alloca double_type var_name builder +;; let rec codegen_expr = function | Ast.Expr.Var (var_names, body) -> - let old_bindings = ref [] in - let the_function = Llvm.block_parent (Llvm.insertion_block builder) in - (* Register all variables and emit their initializer. *) - List.iter var_names ~f:(fun (var_name, init) -> - (* Emit the initializer before adding the variable to scope, this - * prevents the initializer from referencing the variable itself, and - * permits stuff like this: - * var a = 1 in - * var a = a in ... # refers to outer 'a'. *) - let init_val = - match init with - | Some init -> codegen_expr init - (* If not specified, use 0.0. *) - | None -> Llvm.const_float double_type 0.0 - in - let alloca = create_entry_block_alloca the_function var_name in - Llvm.build_store init_val alloca builder |> ignore; - (* Remember the old variable binding so that we can restore the binding - * when we unrecurse. *) - (match Hashtbl.find named_values var_name with - | None -> () - | Some old_value -> - old_bindings := (var_name, old_value) :: !old_bindings); - (* Remember this binding. *) - Hashtbl.set named_values ~key:var_name ~data:alloca); - (* Codegen the body, now that all vars are in scope. *) - let body_val = codegen_expr body in - (* Pop all our variables from scope. *) - List.iter !old_bindings ~f:(fun (var_name, old_value) -> - Hashtbl.set named_values ~key:var_name ~data:old_value); - (* Return the body computation. *) - body_val + let old_bindings = ref [] in + let the_function = Llvm.block_parent (Llvm.insertion_block builder) in + (* Register all variables and emit their initializer. *) + List.iter var_names ~f:(fun (var_name, init) -> + (* Emit the initializer before adding the variable to scope, this + * prevents the initializer from referencing the variable itself, and + * permits stuff like this: + * var a = 1 in + * var a = a in ... # refers to outer 'a'. *) + let init_val = + match init with + | Some init -> codegen_expr init + (* If not specified, use 0.0. *) + | None -> Llvm.const_float double_type 0.0 + in + let alloca = create_entry_block_alloca the_function var_name in + Llvm.build_store init_val alloca builder |> ignore; + (* Remember the old variable binding so that we can restore the binding + * when we unrecurse. *) + (match Hashtbl.find named_values var_name with + | None -> () + | Some old_value -> old_bindings := (var_name, old_value) :: !old_bindings); + (* Remember this binding. *) + Hashtbl.set named_values ~key:var_name ~data:alloca); + (* Codegen the body, now that all vars are in scope. *) + let body_val = codegen_expr body in + (* Pop all our variables from scope. *) + List.iter !old_bindings ~f:(fun (var_name, old_value) -> + Hashtbl.set named_values ~key:var_name ~data:old_value); + (* Return the body computation. *) + body_val | Ast.Expr.Number n -> Llvm.const_float double_type n - | Ast.Expr.Variable name -> ( - match Hashtbl.find named_values name with - | None -> raise_s [%message "unkown variable name" (name : string)] - (* Load the value *) - | Some v -> Llvm.build_load v name builder) + | Ast.Expr.Variable name -> + (match Hashtbl.find named_values name with + | None -> raise_s [%message "unkown variable name" (name : string)] + (* Load the value *) + | Some v -> Llvm.build_load double_type v name builder) | Ast.Expr.Binary ('=', lhs, rhs) -> - (* Special case '=' because we don't want to emit the LHS as an - * expression. *) - let name = - match lhs with - | Ast.Expr.Variable name -> name - | _ -> raise_s [%message "destination of '=' must be a variable"] - in - (* Codegen the rhs. *) - let val_ = codegen_expr rhs in - (* Lookup the name. *) - let variable = - match Hashtbl.find named_values name with - | None -> raise_s [%message "unknown variable name" (name : string)] - | Some var -> var - in - Llvm.build_store val_ variable builder |> ignore; - val_ - | Ast.Expr.Binary (op, lhs, rhs) -> ( - let lhs_val = codegen_expr lhs in - let rhs_val = codegen_expr rhs in - match op with - | '+' -> Llvm.build_fadd lhs_val rhs_val "addtmp" builder - | '-' -> Llvm.build_fsub lhs_val rhs_val "subtmp" builder - | '*' -> Llvm.build_fmul lhs_val rhs_val "multmp" builder - | '<' -> - let i = - Llvm.build_fcmp Llvm.Fcmp.Ult lhs_val rhs_val "cmptmp" builder - in - (* Convert bool 0/1 to double 0.0 or 1.0 *) - Llvm.build_uitofp i double_type "booltmp" builder - | _ -> - (* If it wasn't a builtin binary operator, it must be a user defined - * one. Emit a call to it. *) - let callee = "binary" ^ String.make 1 op in - let callee = - match Llvm.lookup_function callee the_module with - | Some callee -> callee - | None -> raise_s [%message "unrecognized binop" (op : char)] - in - Llvm.build_call callee [| lhs_val; rhs_val |] "binop" builder) + (* Special case '=' because we don't want to emit the LHS as an + * expression. *) + let name = + match lhs with + | Ast.Expr.Variable name -> name + | _ -> raise_s [%message "destination of '=' must be a variable"] + in + (* Codegen the rhs. *) + let val_ = codegen_expr rhs in + (* Lookup the name. *) + let variable = + match Hashtbl.find named_values name with + | None -> raise_s [%message "unknown variable name" (name : string)] + | Some var -> var + in + Llvm.build_store val_ variable builder |> ignore; + val_ + | Ast.Expr.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + (match op with + | '+' -> Llvm.build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> Llvm.build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> Llvm.build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + let i = Llvm.build_fcmp Llvm.Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + (* Convert bool 0/1 to double 0.0 or 1.0 *) + Llvm.build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ String.make 1 op in + let callee = + match Llvm.lookup_function callee the_module with + | Some callee -> callee + | None -> raise_s [%message "unrecognized binop" (op : char)] + in + let fnty = + Llvm.function_type double_type (Array.of_list [ double_type; double_type ]) + in + Llvm.build_call fnty callee [| lhs_val; rhs_val |] "binop" builder) | Ast.Expr.Call (callee_name, args) -> - (* Look up the name in the module table. *) - let callee = - match Llvm.lookup_function callee_name the_module with - | Some callee -> callee - | None -> raise_s [%message "undefined function" (callee_name : string)] - in - (* If argument mismatch error. *) - if Int.( = ) (Array.length (Llvm.params callee)) (List.length args) then - () - else - raise_s - [%message "incorrect number of arguments" (callee_name : string)]; - let args = Array.map (Array.of_list args) ~f:codegen_expr in - Llvm.build_call callee args "calltmp" builder + (* Look up the name in the module table. *) + let callee = + match Llvm.lookup_function callee_name the_module with + | Some callee -> callee + | None -> raise_s [%message "undefined function" (callee_name : string)] + in + (* If argument mismatch error. *) + if Int.( = ) (Array.length (Llvm.params callee)) (List.length args) + then () + else raise_s [%message "incorrect number of arguments" (callee_name : string)]; + let args = Array.map (Array.of_list args) ~f:codegen_expr in + let arg_typs = Array.map args ~f:(Fn.const double_type) in + let fnty = Llvm.function_type double_type arg_typs in + Llvm.build_call fnty callee args "calltmp" builder | Ast.Expr.If (condition, then_, else_) -> - let cond = codegen_expr condition in - (* Convert condition to a bool by comparing equal to 0.0 *) - let zero = Llvm.const_float double_type 0.0 in - let cond_val = Llvm.build_fcmp Llvm.Fcmp.One cond zero "ifcond" builder in - (* Grab the first block so that we might later add the conditional branch - * to it at the end of the function. *) - let start_bb = Llvm.insertion_block builder in - let the_function = Llvm.block_parent start_bb in - let then_bb = Llvm.append_block context "then" the_function in - (* Emit 'then' value. *) - Llvm.position_at_end then_bb builder; - let then_val = codegen_expr then_ in - (* Codegen of 'then' can change the current block, update then_bb for the - * phi. We create a new name because one is used for the phi node, and the - * other is used for the conditional branch. *) - let new_then_bb = Llvm.insertion_block builder in - (* Emit 'else' value. *) - let else_bb = Llvm.append_block context "else" the_function in - Llvm.position_at_end else_bb builder; - let else_val = codegen_expr else_ in - (* Codegen of 'else' can change the current block, update else_bb for the - * phi. *) - let new_else_bb = Llvm.insertion_block builder in - (* Emit merge block. *) - let merge_bb = Llvm.append_block context "ifcont" the_function in - Llvm.position_at_end merge_bb builder; - let incoming = [ (then_val, new_then_bb); (else_val, new_else_bb) ] in - let phi = Llvm.build_phi incoming "iftmp" builder in - (* Return to the start block to add the conditional branch. *) - Llvm.position_at_end start_bb builder; - Llvm.build_cond_br cond_val then_bb else_bb builder |> ignore; - (* Set a unconditional branch at the end of the 'then' block and the - * 'else' block to the 'merge' block. *) - Llvm.position_at_end new_then_bb builder; - Llvm.build_br merge_bb builder |> ignore; - Llvm.position_at_end new_else_bb builder; - Llvm.build_br merge_bb builder |> ignore; - (* Finally, set the builder to the end of the merge block. *) - Llvm.position_at_end merge_bb builder; - phi + let cond = codegen_expr condition in + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = Llvm.const_float double_type 0.0 in + let cond_val = Llvm.build_fcmp Llvm.Fcmp.One cond zero "ifcond" builder in + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = Llvm.insertion_block builder in + let the_function = Llvm.block_parent start_bb in + let then_bb = Llvm.append_block context "then" the_function in + (* Emit 'then' value. *) + Llvm.position_at_end then_bb builder; + let then_val = codegen_expr then_ in + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = Llvm.insertion_block builder in + (* Emit 'else' value. *) + let else_bb = Llvm.append_block context "else" the_function in + Llvm.position_at_end else_bb builder; + let else_val = codegen_expr else_ in + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = Llvm.insertion_block builder in + (* Emit merge block. *) + let merge_bb = Llvm.append_block context "ifcont" the_function in + Llvm.position_at_end merge_bb builder; + let incoming = [ then_val, new_then_bb; else_val, new_else_bb ] in + let phi = Llvm.build_phi incoming "iftmp" builder in + (* Return to the start block to add the conditional branch. *) + Llvm.position_at_end start_bb builder; + Llvm.build_cond_br cond_val then_bb else_bb builder |> ignore; + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + Llvm.position_at_end new_then_bb builder; + Llvm.build_br merge_bb builder |> ignore; + Llvm.position_at_end new_else_bb builder; + Llvm.build_br merge_bb builder |> ignore; + (* Finally, set the builder to the end of the merge block. *) + Llvm.position_at_end merge_bb builder; + phi | Ast.Expr.For (var_name, start, end_, step, body) -> - (* Output this as: - * var = alloca double - * ... - * start = startexpr - * store start -> var - * goto loop - * loop: - * ... - * bodyexpr - * ... - * loopend: - * step = stepexpr - * endcond = endexpr - * - * curvar = load var - * nextvar = curvar + step - * store nextvar -> var - * br endcond, loop, endloop - * outloop: *) - let the_function = Llvm.block_parent (Llvm.insertion_block builder) in - (* Create an alloca for the variable in the entry block. *) - let alloca = create_entry_block_alloca the_function var_name in - (* Emit the start code first, without 'variable' in scope. *) - let start_val = codegen_expr start in - (* Store the value into the alloca. *) - Llvm.build_store start_val alloca builder |> ignore; - (* Make the new basic block for the loop header, inserting after current - * block. *) - let loop_bb = Llvm.append_block context "loop" the_function in - (* Insert an explicit fall through from the current block to the - * loop_bb. *) - Llvm.build_br loop_bb builder |> ignore; - (* Start insertion in loop_bb. *) - Llvm.position_at_end loop_bb builder; - (* Within the loop, the variable is defined equal to the PHI node. If it - * shadows an existing variable, we have to restore it, so save it - * now. *) - let old_val = Hashtbl.find named_values var_name in - Hashtbl.set named_values ~key:var_name ~data:alloca; - (* Emit the body of the loop. This, like any other expr, can change the - * current BB. Note that we ignore the value computed by the body, but - * don't allow an error *) - codegen_expr body |> ignore; - (* Emit the step value. *) - let step_val = - match step with - | Some step -> codegen_expr step - (* If not specified, use 1.0. *) - | None -> Llvm.const_float double_type 1.0 - in - (* Compute the end condition. *) - let end_cond = codegen_expr end_ in - (* Reload, increment, and restore the alloca. This handles the case where - * the body of the loop mutates the variable. *) - let cur_var = Llvm.build_load alloca var_name builder in - let next_var = Llvm.build_fadd cur_var step_val "nextvar" builder in - Llvm.build_store next_var alloca builder |> ignore; - (* Convert condition to a bool by comparing equal to 0.0. *) - let zero = Llvm.const_float double_type 0.0 in - let end_cond = - Llvm.build_fcmp Llvm.Fcmp.One end_cond zero "loopcond" builder - in - (* Create the "after loop" block and insert it. *) - let after_bb = Llvm.append_block context "afterloop" the_function in - (* Insert the conditional branch into the end of loop_end_bb. *) - Llvm.build_cond_br end_cond loop_bb after_bb builder |> ignore; - (* Any new code will be inserted in after_bb. *) - Llvm.position_at_end after_bb builder; - (* Restore the unshadowed variable. *) - (match old_val with - | Some old_val -> Hashtbl.set named_values ~key:var_name ~data:old_val - | None -> ()); - (* for expr always returns 0.0. *) - Llvm.const_null double_type + (* Output this as: + * var = alloca double + * ... + * start = startexpr + * store start -> var + * goto loop + * loop: + * ... + * bodyexpr + * ... + * loopend: + * step = stepexpr + * endcond = endexpr + * + * curvar = load var + * nextvar = curvar + step + * store nextvar -> var + * br endcond, loop, endloop + * outloop: *) + let the_function = Llvm.block_parent (Llvm.insertion_block builder) in + (* Create an alloca for the variable in the entry block. *) + let alloca = create_entry_block_alloca the_function var_name in + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + (* Store the value into the alloca. *) + Llvm.build_store start_val alloca builder |> ignore; + (* Make the new basic block for the loop header, inserting after current + * block. *) + let loop_bb = Llvm.append_block context "loop" the_function in + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + Llvm.build_br loop_bb builder |> ignore; + (* Start insertion in loop_bb. *) + Llvm.position_at_end loop_bb builder; + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = Hashtbl.find named_values var_name in + Hashtbl.set named_values ~key:var_name ~data:alloca; + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + codegen_expr body |> ignore; + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> Llvm.const_float double_type 1.0 + in + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + (* Reload, increment, and restore the alloca. This handles the case where + * the body of the loop mutates the variable. *) + let cur_var = Llvm.build_load double_type alloca var_name builder in + (* let cur_var = Llvm.build_load (Llvm.type_of alloca) alloca var_name builder in *) + let next_var = Llvm.build_fadd cur_var step_val "nextvar" builder in + Llvm.build_store next_var alloca builder |> ignore; + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = Llvm.const_float double_type 0.0 in + let end_cond = Llvm.build_fcmp Llvm.Fcmp.One end_cond zero "loopcond" builder in + (* Create the "after loop" block and insert it. *) + let after_bb = Llvm.append_block context "afterloop" the_function in + (* Insert the conditional branch into the end of loop_end_bb. *) + Llvm.build_cond_br end_cond loop_bb after_bb builder |> ignore; + (* Any new code will be inserted in after_bb. *) + Llvm.position_at_end after_bb builder; + (* Restore the unshadowed variable. *) + (match old_val with + | Some old_val -> Hashtbl.set named_values ~key:var_name ~data:old_val + | None -> ()); + (* for expr always returns 0.0. *) + Llvm.const_null double_type | Ast.Expr.Unary (op, operand) -> - let operand = codegen_expr operand in - let callee = "unary" ^ String.make 1 op in - let callee = - match Llvm.lookup_function callee the_module with - | Some callee -> callee - | None -> raise_s [%message "unknown unary operator" (op : char)] - in - Llvm.build_call callee [| operand |] "unop" builder + let operand = codegen_expr operand in + let callee = "unary" ^ String.make 1 op in + let callee = + match Llvm.lookup_function callee the_module with + | Some callee -> callee + | None -> raise_s [%message "unknown unary operator" (op : char)] + in + let fnty = Llvm.function_type double_type (Array.of_list [ double_type ]) in + Llvm.build_call fnty callee [| operand |] "unop" builder +;; let codegen_proto_existing = function | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> - (* Make the function type: double(double,double) etc. *) - Hashtbl.clear named_values; - let doubles = Array.create ~len:(List.length args) double_type in - let ft = Llvm.function_type double_type doubles in - let f, existing = - match Llvm.lookup_function name the_module with - | None -> (Llvm.declare_function name ft the_module, `Existing) - (* If 'f' conflicted, there was already something named 'name'. If it - * has a body, don't allow redefinition or reextern. *) - | Some f -> - (* If 'f' already has a body, reject this. *) - if Int.(Array.length (Llvm.basic_blocks f) = 0) then () - else raise_s [%message "redefinition of function" (name : string)]; - (* If 'f' took a different number of arguments, reject. *) - if Int.(Array.length (Llvm.params f) = List.length args) then () - else - raise_s - [%message - "redefinition of function with a different number of args" - (name : string)]; - (f, `Not_existing) - in - (* Set names for all arguments. *) - Array.iteri (Llvm.params f) ~f:(fun i a -> - let name = List.nth_exn args i in - Llvm.set_value_name name a; - Hashtbl.add_exn named_values ~key:name ~data:a); - (f, existing) + (* Make the function type: double(double,double) etc. *) + Hashtbl.clear named_values; + let doubles = Array.create ~len:(List.length args) double_type in + let ft = Llvm.function_type double_type doubles in + let f, existing = + match Llvm.lookup_function name the_module with + | None -> Llvm.declare_function name ft the_module, `Existing + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if Int.(Array.length (Llvm.basic_blocks f) = 0) + then () + else raise_s [%message "redefinition of function" (name : string)]; + (* If 'f' took a different number of arguments, reject. *) + if Int.(Array.length (Llvm.params f) = List.length args) + then () + else + raise_s + [%message + "redefinition of function with a different number of args" (name : string)]; + f, `Not_existing + in + (* Set names for all arguments. *) + Array.iteri (Llvm.params f) ~f:(fun i a -> + let name = List.nth_exn args i in + Llvm.set_value_name name a; + Hashtbl.add_exn named_values ~key:name ~data:a); + f, existing +;; (* Create an alloca for each argument and register the argument in the symbol * table so that references to it will succeed. *) @@ -278,47 +280,50 @@ let create_argument_allocas the_function proto = | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args in Array.iteri (Llvm.params the_function) ~f:(fun i ai -> - let var_name = List.nth_exn args i in - (* Create an alloca for this variable. *) - let alloca = create_entry_block_alloca the_function var_name in - (* Store the initial value into the alloca. *) - Llvm.build_store ai alloca builder |> ignore; - (* Add arguments to variable symbol table. *) - Hashtbl.set named_values ~key:var_name ~data:alloca) + let var_name = List.nth_exn args i in + (* Create an alloca for this variable. *) + let alloca = create_entry_block_alloca the_function var_name in + (* Store the initial value into the alloca. *) + Llvm.build_store ai alloca builder |> ignore; + (* Add arguments to variable symbol table. *) + Hashtbl.set named_values ~key:var_name ~data:alloca) +;; let codegen_func the_fpm = function - | Ast.Function (proto, body) -> ( - let the_function, existing = codegen_proto_existing proto in - (* If this is an operator, install it. *) - (match proto with - | Ast.BinOpPrototype (name, _args, prec) -> - let op = name.[String.length name - 1] in - Hashtbl.add_exn Ast.binop_precedence ~key:op ~data:prec - | _ -> ()); - (* Create a new basic block to start insertion into. *) - let bb = Llvm.append_block context "entry" the_function in - Llvm.position_at_end bb builder; - try - (* Add all arguments to the symbol table and create their allocas. *) - create_argument_allocas the_function proto; - let return_val = codegen_expr body in - (* Finish off the function. *) - let (_ : Llvm.llvalue) = Llvm.build_ret return_val builder in - (* Validate the generated code, checking for consistency. *) - (match Llvm_analysis.verify_function the_function with + | Ast.Function (proto, body) -> + let the_function, existing = codegen_proto_existing proto in + (* If this is an operator, install it. *) + (match proto with + | Ast.BinOpPrototype (name, _args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add_exn Ast.binop_precedence ~key:op ~data:prec + | _ -> ()); + (* Create a new basic block to start insertion into. *) + let bb = Llvm.append_block context "entry" the_function in + Llvm.position_at_end bb builder; + (try + (* Add all arguments to the symbol table and create their allocas. *) + create_argument_allocas the_function proto; + let return_val = codegen_expr body in + (* Finish off the function. *) + let (_ : Llvm.llvalue) = Llvm.build_ret return_val builder in + (* Validate the generated code, checking for consistency. *) + (match Llvm_analysis.verify_function the_function with | true -> () | false -> - Caml.Format.printf "invalid function generated\n%s\n" - (Llvm.string_of_llvalue the_function); - Llvm_analysis.assert_valid_function the_function); - (* Optimize the function. *) - let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in - the_function - with e -> - (match existing with + Stdlib.Format.printf + "invalid function generated\n%s\n" + (Llvm.string_of_llvalue the_function); + Llvm_analysis.assert_valid_function the_function); + (* Optimize the function. *) + let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in + the_function + with + | e -> + (match existing with | `Not_existing -> Llvm.delete_function the_function - | `Existing -> - Array.iter (Llvm.basic_blocks the_function) ~f:Llvm.delete_block); - raise e) + | `Existing -> Array.iter (Llvm.basic_blocks the_function) ~f:Llvm.delete_block); + raise e) +;; let codegen_proto proto = codegen_proto_existing proto |> fst diff --git a/lib/codegen.mli b/lib/codegen.mli index 24e8820..73a273f 100644 --- a/lib/codegen.mli +++ b/lib/codegen.mli @@ -1,3 +1,5 @@ +open Lang_ks + val the_module : Llvm.llmodule val codegen_proto : Ast.proto -> Llvm.llvalue val codegen_func : [ `Function ] Llvm.PassManager.t -> Ast.func -> Llvm.llvalue diff --git a/lib/dune b/lib/dune index f22c330..17f8b7d 100644 --- a/lib/dune +++ b/lib/dune @@ -9,13 +9,8 @@ llvm.scalar_opts llvm.all_backends stdio - menhirLib) + menhirLib + lang_ks) (preprocess (pps ppx_jane ppx_expect ppx_let)) (inline_tests)) - -(ocamllex lexer) - -(menhir - (flags --external-tokens Ast --explain --table) - (modules parser)) diff --git a/lib/kaleidoscope_lib.ml b/lib/kaleidoscope_lib.ml index fc85bed..162c209 100644 --- a/lib/kaleidoscope_lib.ml +++ b/lib/kaleidoscope_lib.ml @@ -1,4 +1,4 @@ module Codegen = Codegen -module Lexer = Lexer -module Parser = Parser +module Lexer = Lang_ks.Lexer +module Parser = Lang_ks.Parser module Toplevel = Toplevel diff --git a/lib/parse_tests.mli b/lib/parse_tests.mli deleted file mode 100644 index 6130296..0000000 --- a/lib/parse_tests.mli +++ /dev/null @@ -1 +0,0 @@ -(*_ mli intentionally blank for test file *) diff --git a/lib/toplevel.ml b/lib/toplevel.ml index 356b744..b4ff06c 100644 --- a/lib/toplevel.ml +++ b/lib/toplevel.ml @@ -1,4 +1,5 @@ open Base +open Lang_ks let dump_to_object ~the_fpm = Llvm_all_backends.initialize (); @@ -8,27 +9,26 @@ let dump_to_object ~the_fpm = let cpu = "generic" in let reloc_mode = Llvm_target.RelocMode.Default in let machine = - Llvm_target.TargetMachine.create ~triple:target_triple ~cpu ~reloc_mode - target + Llvm_target.TargetMachine.create ~triple:target_triple ~cpu ~reloc_mode target in let data_layout = - Llvm_target.TargetMachine.data_layout machine - |> Llvm_target.DataLayout.as_string + Llvm_target.TargetMachine.data_layout machine |> Llvm_target.DataLayout.as_string in Llvm.set_target_triple target_triple Codegen.the_module; Llvm.set_data_layout data_layout Codegen.the_module; let filename = "output.o" in Llvm_target.TargetMachine.add_analysis_passes the_fpm machine; let file_type = Llvm_target.CodeGenFileType.ObjectFile in - Llvm_target.TargetMachine.emit_to_file Codegen.the_module file_type filename - machine; + Llvm_target.TargetMachine.emit_to_file Codegen.the_module file_type filename machine; (* printf "Wrote %s\n" filename; *) () +;; let run_main in_channel ~the_fpm ~the_execution_engine = let anonymous_func_count = ref 0 in let supplier = - Parser.MenhirInterpreter.lexer_lexbuf_to_supplier Lexer.read + Parser.MenhirInterpreter.lexer_lexbuf_to_supplier + Lexer.read (Lexing.from_channel in_channel) in let rec run_loop the_fpm the_execution_engine supplier = @@ -39,20 +39,17 @@ let run_main in_channel ~the_fpm ~the_execution_engine = (try match Parser.MenhirInterpreter.loop supplier incremental with | `Expr ast -> - (* printf "parsed a toplevel expression" ; *) - (* Evaluate a top-level expression into an anonymous function. *) - let func = Ast.func_of_no_binop_func ast in - Out_channel.flush Stdio.stdout; - Llvm_executionengine.add_module Codegen.the_module - the_execution_engine; - anonymous_func_count := !anonymous_func_count + 1; - let _tmp_name = - Printf.sprintf "__toplevel%d" !anonymous_func_count - in - let tmp_func = Ast.set_func_name "main" func in - let _the_function = Codegen.codegen_func the_fpm tmp_func in - () - (* Llvm.dump_value the_function ; + (* printf "parsed a toplevel expression" ; *) + (* Evaluate a top-level expression into an anonymous function. *) + let func = Ast.func_of_no_binop_func ast in + Out_channel.flush Stdio.stdout; + Llvm_executionengine.add_module Codegen.the_module the_execution_engine; + anonymous_func_count := !anonymous_func_count + 1; + let _tmp_name = Printf.sprintf "__toplevel%d" !anonymous_func_count in + let tmp_func = Ast.set_func_name "main" func in + let _the_function = Codegen.codegen_func the_fpm tmp_func in + () + (* Llvm.dump_value the_function ; (* JIT the function, returning a function pointer. *) let fp = Llvm_executionengine.get_function_address tmp_name @@ -63,37 +60,39 @@ let run_main in_channel ~the_fpm ~the_execution_engine = Llvm_executionengine.remove_module Codegen.the_module the_execution_engine *) | `Extern ext -> - (* printf "parsed an extern" ; + (* printf "parsed an extern" ; printf !"%{sexp: Ast.proto}\n" ext; *) - Out_channel.flush Stdio.stdout; - let _code = Codegen.codegen_proto ext in - () - (* Llvm.dump_value (Codegen.codegen_proto ext) *) + Out_channel.flush Stdio.stdout; + let _code = Codegen.codegen_proto ext in + () + (* Llvm.dump_value (Codegen.codegen_proto ext) *) | `Def def -> - (* printf "parsed a definition" ; *) - let func = Ast.func_of_no_binop_func def in - (* printf !"%{sexp: Ast.func}\n" func; *) - Out_channel.flush Stdio.stdout; - let _code = Codegen.codegen_func the_fpm func in - () - (* Llvm.dump_value code *) + (* printf "parsed a definition" ; *) + let func = Ast.func_of_no_binop_func def in + (* printf !"%{sexp: Ast.func}\n" func; *) + Out_channel.flush Stdio.stdout; + let _code = Codegen.codegen_func the_fpm func in + () + (* Llvm.dump_value code *) | `Eof -> - (* printf "\n\n" ; - printf "reached eof\n" ; *) - (* printf "module dump:\n" ; *) - Out_channel.flush Out_channel.stdout; - (* Add ch8 *) - dump_to_object ~the_fpm; - (* Print out all the generated code. *) - Llvm.dump_module Codegen.the_module; - Caml.exit 0 - with e -> + (* printf "\n\n" ; + printf "reached eof\n" ; *) + (* printf "module dump:\n" ; *) + Out_channel.flush Out_channel.stdout; + (* Add ch8 *) + dump_to_object ~the_fpm; + (* Print out all the generated code. *) + Llvm.dump_module Codegen.the_module; + Stdlib.exit 0 + with + | e -> (* Skip expression for error recovery. *) - Caml.Printf.printf !"\nencountered an error %{sexp: exn}" e); + Stdlib.Printf.printf !"\nencountered an error %{sexp: exn}" e); Out_channel.flush Out_channel.stdout; run_loop the_fpm the_execution_engine supplier in run_loop the_fpm the_execution_engine supplier +;; let main input = (* Install standard binary operators. @@ -106,8 +105,8 @@ let main input = (* Create the JIT *) let the_execution_engine = (match Llvm_executionengine.initialize () with - | true -> () - | false -> raise_s [%message "failed to initialize"]); + | true -> () + | false -> raise_s [%message "failed to initialize"]); Llvm_executionengine.create Codegen.the_module in let the_fpm = Llvm.PassManager.create_function Codegen.the_module in @@ -124,5 +123,5 @@ let main input = Llvm.PassManager.initialize the_fpm |> ignore; match input with | `Stdin -> run_main ~the_execution_engine ~the_fpm In_channel.stdin - | `File file -> - In_channel.with_open_text file (run_main ~the_execution_engine ~the_fpm) + | `File file -> In_channel.with_open_text file (run_main ~the_execution_engine ~the_fpm) +;;