diff --git a/CMakeLists.txt b/CMakeLists.txt index 8764449..f7e1fde 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 4.0) +cmake_minimum_required(VERSION 3.16) project(cir CXX) set(CMAKE_CXX_STANDARD 23) @@ -36,6 +36,9 @@ add_executable(decbc tools/debugger/main.cpp) target_link_libraries(decbc PRIVATE cir_shared) target_include_directories(decbc PRIVATE ${CMAKE_SOURCE_DIR}) +add_executable(bfcas tools/bf/main.cpp) +target_include_directories(bfcas PRIVATE ${CMAKE_SOURCE_DIR}) + add_subdirectory(examples/dl-imports EXCLUDE_FROM_ALL) add_custom_target(run diff --git a/core/bf.h b/core/bf.h new file mode 100644 index 0000000..5c1193b --- /dev/null +++ b/core/bf.h @@ -0,0 +1,240 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "cir.h" + +// Brainfuck to CIR compiler. +// +// Register assignments: +// r0 - I/O register (argument/return for std.putchar / std.getchar) +// r1 - data pointer (address of the current tape cell) +// r2 - temporary cell value +// r3 - tape base pointer (saved for free on exit) +// +// Jump target encoding: stored as (instruction_index - 1) because +// execute_function does fn.co = target; fn.co++ after each op. +class BrainfuckCompiler +{ + static Op make_op(OpType type, Word a0, Word a1, Word a2) + { + Op op; + op.type = type; + op.args[0] = std::move(a0); + op.args[1] = std::move(a1); + op.args[2] = std::move(a2); + return op; + } + + static Op make_op(OpType type, Word a0, Word a1) + { + return make_op(type, std::move(a0), std::move(a1), Word::from_null()); + } + + static Op make_op(OpType type, Word a0) + { + return make_op(type, std::move(a0), Word::from_null(), Word::from_null()); + } + + static Op make_op(OpType type) + { + return make_op(type, Word::from_null(), Word::from_null(), Word::from_null()); + } + + // load r2, r1, $1 -> r2 = *r1 (1-byte cell) + static void emit_load_cell(Function& fn) + { + fn.ops.push_back(make_op(OpType::Load, + Word::from_reg(2), + Word::from_reg(1), + Word::from_int(1) + )); + } + + // store r1, r2, $1 -> *r1 = r2 (1-byte cell) + static void emit_store_cell(Function& fn) + { + fn.ops.push_back(make_op(OpType::Store, + Word::from_reg(1), + Word::from_reg(2), + Word::from_int(1) + )); + } + + // cmp r2, $0 + static void emit_cmp_zero(Function& fn) + { + fn.ops.push_back(make_op(OpType::Cmp, + Word::from_reg(2), + Word::from_int(0) + )); + } + +public: + // Compile a Brainfuck source string into a CIR Program. + Program compile(const std::string& source) + { + Program prog; + Function main_func; + + // alloc r3, $30000 ; allocate zero-initialized tape + main_func.ops.push_back(make_op(OpType::Alloc, + Word::from_reg(3), + Word::from_int(30000) + )); + + // mov r3, r1 ; data pointer starts at tape base + main_func.ops.push_back(make_op(OpType::Mov, + Word::from_reg(3), + Word::from_reg(1) + )); + + // loop_stack holds the index of the `je` instruction emitted for each `[` + std::stack loop_stack; + + for (char c : source) + { + switch (c) + { + case '>': + // add r1, r1, $1 + main_func.ops.push_back(make_op(OpType::Add, + Word::from_reg(1), + Word::from_reg(1), + Word::from_int(1) + )); + break; + + case '<': + // sub r1, r1, $1 + main_func.ops.push_back(make_op(OpType::Sub, + Word::from_reg(1), + Word::from_reg(1), + Word::from_int(1) + )); + break; + + case '+': + // load r2, r1, $1 / add r2, r2, $1 / store r1, r2, $1 + emit_load_cell(main_func); + main_func.ops.push_back(make_op(OpType::Add, + Word::from_reg(2), + Word::from_reg(2), + Word::from_int(1) + )); + emit_store_cell(main_func); + break; + + case '-': + // load r2, r1, $1 / sub r2, r2, $1 / store r1, r2, $1 + emit_load_cell(main_func); + main_func.ops.push_back(make_op(OpType::Sub, + Word::from_reg(2), + Word::from_reg(2), + Word::from_int(1) + )); + emit_store_cell(main_func); + break; + + case '.': + // load r2, r1, $1 / mov r2, r0 / callx std.putchar + emit_load_cell(main_func); + main_func.ops.push_back(make_op(OpType::Mov, + Word::from_reg(2), + Word::from_reg(0) + )); + main_func.ops.push_back(make_op(OpType::CallExtern, + Word::from_string_owned("std.putchar") + )); + break; + + case ',': + // callx std.getchar / store r1, r0, $1 + main_func.ops.push_back(make_op(OpType::CallExtern, + Word::from_string_owned("std.getchar") + )); + main_func.ops.push_back(make_op(OpType::Store, + Word::from_reg(1), + Word::from_reg(0), + Word::from_int(1) + )); + break; + + case '[': + { + // emit: load r2, r1, $1 + // emit: cmp r2, $0 + // emit: je PLACEHOLDER <- index saved in loop_stack + emit_load_cell(main_func); + emit_cmp_zero(main_func); + size_t je_index = main_func.ops.size(); + loop_stack.push(je_index); + main_func.ops.push_back(make_op(OpType::Je, + Word::from_int(-1) // placeholder, patched when `]` is seen + )); + } + break; + + case ']': + { + if (loop_stack.empty()) + { + throw std::runtime_error("Unmatched ']' in Brainfuck source"); + } + size_t je_index = loop_stack.top(); + loop_stack.pop(); + + // The `[` check starts at (je_index - 2): load, cmp, je + // jne target: we want to jump back to the load at (je_index - 2) + // Stored value = target - 1 = (je_index - 2) - 1 = je_index - 3 + emit_load_cell(main_func); + emit_cmp_zero(main_func); + main_func.ops.push_back(make_op(OpType::Jne, + Word::from_int(static_cast(je_index) - 3) + )); + + // Patch `[`'s je to jump to loop_end (current size) + // Stored value = loop_end - 1 + size_t loop_end = main_func.ops.size(); + main_func.ops[je_index].args[0] = + Word::from_int(static_cast(loop_end) - 1); + } + break; + + default: + break; // ignore non-BF characters (comments) + } + } + + if (!loop_stack.empty()) + { + throw std::runtime_error("Unmatched '[' in Brainfuck source"); + } + + // free r3 ; release tape + main_func.ops.push_back(make_op(OpType::Free, Word::from_reg(3))); + + // halt + main_func.ops.push_back(make_op(OpType::Halt)); + + prog.functions["main"] = std::move(main_func); + prog.required_externs = {"std.putchar", "std.getchar"}; + return prog; + } + + // Compile a Brainfuck source file into a CIR Program. + Program compile_file(const std::string& filename) + { + std::ifstream file(filename); + if (!file.is_open()) + { + throw std::runtime_error("Failed to open file: " + filename); + } + std::ostringstream oss; + oss << file.rdbuf(); + return compile(oss.str()); + } +}; diff --git a/core/cir.h b/core/cir.h index 0d6d5ea..209eaf2 100644 --- a/core/cir.h +++ b/core/cir.h @@ -234,6 +234,7 @@ CIR_INLINE CIR_API void CIR::push(const Word& value) CIR_INLINE CIR_API void CIR::move(const Word& w, uint16_t i) { registers[i] = w; + registers[i].set_flag(WordFlag::Register); } CIR_INLINE CIR_API Word& CIR::getr(uint16_t i) @@ -282,123 +283,112 @@ CIR_API void CIR::execute_op(Function& fn, Op op) // (reg) case OpType::Pop: { - Word& r = getr(op.args[0].as_int()); - r.expect_flag(WordFlag::Register); - r = pop(); + op.args[0].expect_flag(WordFlag::Register); + move(pop(), static_cast(op.args[0].as_int())); } break; // (dest, imm/reg, imm/reg) case OpType::Add: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a + b; + move(a + b, static_cast(op.args[0].as_int())); } break; // (dest, imm/reg, imm/reg) case OpType::Sub: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a - b; + move(a - b, static_cast(op.args[0].as_int())); } break; // (dest, imm/reg, imm/reg) case OpType::Mul: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a * b; + move(a * b, static_cast(op.args[0].as_int())); } break; // (dest, imm/reg, imm/reg) case OpType::Div: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a / b; + move(a / b, static_cast(op.args[0].as_int())); } break; case OpType::Mod: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a % b; + move(a % b, static_cast(op.args[0].as_int())); } break; case OpType::And: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a & b; + move(a & b, static_cast(op.args[0].as_int())); } break; case OpType::Or: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a | b; + move(a | b, static_cast(op.args[0].as_int())); } break; // TODO: this and following need implementation case OpType::Xor: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a ^ b; + move(a ^ b, static_cast(op.args[0].as_int())); } break; case OpType::Not: { - Word& dest = getr(op.args[0].as_int()); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); a.expect(WordType::Integer); - dest = Word::from_int(~a.as_int()); + move(Word::from_int(~a.as_int()), static_cast(op.args[0].as_int())); } break; case OpType::Shl: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a << b; + move(a << b, static_cast(op.args[0].as_int())); } break; case OpType::Shr: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); Word& a = go(op.args[1]); Word& b = go(op.args[2]); - dest = a >> b; + move(a >> b, static_cast(op.args[0].as_int())); } break; @@ -478,31 +468,34 @@ CIR_API void CIR::execute_op(Function& fn, Op op) case OpType::Inc: { - Word& r = go(op.args[0]); - r.expect_flag(WordFlag::Register); - ++r; + op.args[0].expect_flag(WordFlag::Register); + uint16_t idx = static_cast(op.args[0].as_int()); + Word incremented = ++registers[idx]; + move(incremented, idx); } break; case OpType::Dec: { - Word& r = go(op.args[0]); - r.expect_flag(WordFlag::Register); - --r; + op.args[0].expect_flag(WordFlag::Register); + uint16_t idx = static_cast(op.args[0].as_int()); + Word decremented = --registers[idx]; + move(decremented, idx); } break; case OpType::Neg: { op.args[0].expect_flag(WordFlag::Register); - Word& a = getr(op.args[0].as_int()); + uint16_t idx = static_cast(op.args[0].as_int()); + Word& a = getr(idx); if (a.type == WordType::Integer) { - a = Word::from_int(-a.as_int()); + move(Word::from_int(-a.as_int()), idx); } else if (a.type == WordType::Float) { - a = Word::from_float(-a.as_float()); + move(Word::from_float(-a.as_float()), idx); } else { @@ -513,8 +506,9 @@ CIR_API void CIR::execute_op(Function& fn, Op op) case OpType::Cast: { - Word& dest = getr(op.args[1].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[1].expect_flag(WordFlag::Register); + uint16_t dest_idx = static_cast(op.args[1].as_int()); + Word& dest = getr(dest_idx); Word& target_type = go(op.args[0]); target_type.expect_flag(WordFlag::String); @@ -525,11 +519,11 @@ CIR_API void CIR::execute_op(Function& fn, Op op) { if (strcmp(type_str, "float") == 0) { - dest = Word::from_float(static_cast(dest.as_int())); + move(Word::from_float(static_cast(dest.as_int())), dest_idx); } else if (strcmp(type_str, "ptr") == 0) { - dest = Word::from_ptr(reinterpret_cast(dest.as_int())); + move(Word::from_ptr(reinterpret_cast(dest.as_int())), dest_idx); } else if (strcmp(type_str, "int") != 0) { @@ -540,7 +534,7 @@ CIR_API void CIR::execute_op(Function& fn, Op op) { if (strcmp(type_str, "int") == 0) { - dest = Word::from_int(static_cast(dest.as_float())); + move(Word::from_int(static_cast(dest.as_float())), dest_idx); } else if (strcmp(type_str, "float") != 0) { @@ -551,7 +545,7 @@ CIR_API void CIR::execute_op(Function& fn, Op op) { if (strcmp(type_str, "int") == 0) { - dest = Word::from_int(reinterpret_cast(dest.as_ptr())); + move(Word::from_int(reinterpret_cast(dest.as_ptr())), dest_idx); } else if (strcmp(type_str, "ptr") != 0) { @@ -627,8 +621,8 @@ CIR_API void CIR::execute_op(Function& fn, Op op) // (dest_reg, address, size) case OpType::Load: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); + uint16_t dest_idx = static_cast(op.args[0].as_int()); Word& addr = go(op.args[1]); addr.expect(WordType::Pointer); @@ -647,16 +641,21 @@ CIR_API void CIR::execute_op(Function& fn, Op op) switch (byte_size) { case 1: - dest = Word::from_int(*static_cast(ptr)); + move(Word::from_int(*static_cast(ptr)), dest_idx); break; case 2: - dest = Word::from_int(*static_cast(ptr)); + move(Word::from_int(*static_cast(ptr)), dest_idx); break; case 4: - dest = Word::from_int(*static_cast(ptr)); + move(Word::from_int(*static_cast(ptr)), dest_idx); break; case 8: - std::memcpy(&dest.data, ptr, 8); + { + Word w; + std::memcpy(&w.data, ptr, 8); + w.type = WordType::Integer; + move(w, dest_idx); + } break; default: throw std::runtime_error("Load: unsupported size " + std::to_string(byte_size)); @@ -713,10 +712,9 @@ CIR_API void CIR::execute_op(Function& fn, Op op) case OpType::Alloc: { op.args[0].expect_flag(WordFlag::Register); - Word& dest = getr(op.args[0].as_int()); Word& x = go(op.args[1]); x.expect(WordType::Integer); - dest = Word::from_ptr(heap.allocate(x.as_int())); + move(Word::from_ptr(heap.allocate(x.as_int())), static_cast(op.args[0].as_int())); } break; @@ -728,8 +726,8 @@ CIR_API void CIR::execute_op(Function& fn, Op op) case OpType::Lea: { - Word& dest = getr(op.args[0].as_int()); - dest.expect_flag(WordFlag::Register); + op.args[0].expect_flag(WordFlag::Register); + uint16_t dest_idx = static_cast(op.args[0].as_int()); Word& base = go(op.args[1]); Word& offset = go(op.args[2]); @@ -751,7 +749,7 @@ CIR_API void CIR::execute_op(Function& fn, Op op) throw std::runtime_error("lea: base must be pointer or integer"); } - dest = Word::from_ptr(address); + move(Word::from_ptr(address), dest_idx); } break; diff --git a/core/std.h b/core/std.h index e5089c1..a35eb0f 100644 --- a/core/std.h +++ b/core/std.h @@ -2,6 +2,7 @@ #define STD_H #include +#include #include "cir.h" @@ -15,6 +16,19 @@ namespace cir_std std::cout << std::endl; } + void putchar_fn(CIR& cir) + { + Word& r = cir.getr(0); + r.expect(WordType::Integer); + std::putchar(static_cast(r.as_int())); + } + + void getchar_fn(CIR& cir) + { + int c = std::getchar(); + cir.move(Word::from_int(c == EOF ? -1 : c), 0); + } + namespace list { struct List @@ -86,6 +100,8 @@ namespace cir_std void init_std(CIR& cir) { cir.set_extern_fn("std.print", print); + cir.set_extern_fn("std.putchar", putchar_fn); + cir.set_extern_fn("std.getchar", getchar_fn); list::register_list(cir); } } diff --git a/examples/bf/hello.bf b/examples/bf/hello.bf new file mode 100644 index 0000000..8fa0f72 --- /dev/null +++ b/examples/bf/hello.bf @@ -0,0 +1 @@ +++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++. diff --git a/tools/bf/main.cpp b/tools/bf/main.cpp new file mode 100644 index 0000000..bffda69 --- /dev/null +++ b/tools/bf/main.cpp @@ -0,0 +1,117 @@ +#define CIR_IMPLEMENTATION +#include +#include +#include + +#include "../../core/bf.h" +#include "../../core/std.h" + +namespace fs = std::filesystem; + +static void print_help(const char* prog) +{ + std::cout << "Usage: " << prog << " [options]\n\n" + << "Compile and run a Brainfuck program on the CIR virtual machine.\n\n" + << "Options:\n" + << " -b, --benchmark Show execution time\n" + << " -q, --quiet Suppress info messages\n" + << " -h, --help Show this help message\n" + << " --version Show version information\n"; +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) + { + print_help(argv[0]); + return 0; + } + + std::string input_file; + bool benchmark = false; + bool quiet = false; + + for (int i = 1; i < argc; ++i) + { + std::string arg(argv[i]); + + if (arg == "-h" || arg == "--help") + { + print_help(argv[0]); + return 0; + } + else if (arg == "--version") + { + std::cout << "bfcas (CIR Brainfuck) v" << Config::VERSION << "\n" + << "Copyright (c) 2025, " << Config::AUTHORS << "\n"; + return 0; + } + else if (arg == "-b" || arg == "--benchmark") + { + benchmark = true; + } + else if (arg == "-q" || arg == "--quiet") + { + quiet = true; + } + else if (arg[0] == '-') + { + std::cerr << "[ERROR] Unknown option: " << arg << "\n"; + return 1; + } + else + { + if (!input_file.empty()) + { + std::cerr << "[ERROR] Multiple input files specified\n"; + return 1; + } + input_file = arg; + } + } + + if (input_file.empty()) + { + std::cerr << "[ERROR] No input file specified\n"; + return 1; + } + + if (!fs::exists(input_file)) + { + std::cerr << "[ERROR] File not found: " << input_file << "\n"; + return 1; + } + + try + { + if (!quiet) std::cout << "[INFO] Compiling: " << input_file << "\n"; + + BrainfuckCompiler compiler; + Program prog = compiler.compile_file(input_file); + + if (!quiet) std::cout << "[INFO] Running...\n"; + + CIR cir; + cir_std::init_std(cir); + cir.load_program(std::move(prog)); + + auto start = std::chrono::high_resolution_clock::now(); + cir.execute_program(); + auto end = std::chrono::high_resolution_clock::now(); + + if (!quiet) std::cout << "\n[SUCCESS] Done\n"; + + if (benchmark) + { + auto us = std::chrono::duration_cast(end - start).count(); + std::cout << "Execution time: " << us << " μs\n"; + } + + return 0; + } + catch (const std::exception& e) + { + std::cerr << "[ERROR] " << e.what() << "\n"; + return 1; + } +}