diff --git a/.github/workflows/ubuntu-build.yml b/.github/workflows/ubuntu-build.yml index 4737f70..8c43b54 100644 --- a/.github/workflows/ubuntu-build.yml +++ b/.github/workflows/ubuntu-build.yml @@ -16,8 +16,8 @@ jobs: build_type: [Release, Debug] include: - compiler: gcc - cc: gcc-14 - cxx: g++-14 + cc: gcc-15 + cxx: g++-15 - compiler: clang cc: clang-21 cxx: clang++-21 @@ -25,11 +25,12 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install GCC 14 + - name: Install GCC 15 if: matrix.compiler == 'gcc' run: | + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y sudo apt-get update - sudo apt-get install -y g++-14 + sudo apt-get install -y ${{ matrix.cc }} ${{ matrix.cxx }} - name: Install Clang 21 if: matrix.compiler == 'clang' @@ -39,7 +40,7 @@ jobs: wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh sudo ./llvm.sh 21 - sudo apt-get install -y clang-21 + sudo apt-get install -y ${{ matrix.cc }} - name: Configure & Build run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a18693..1d641d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,10 +38,14 @@ add_library(korka_lib include/korka/compiler/parser.hpp include/korka/shared.hpp include/korka/compiler/ast_walker.hpp - include/korka/compiler/error.hpp + include/korka/shared/error.hpp include/korka/compiler/lex_token.hpp include/korka/utils/const_format.hpp include/korka/compiler/compiler.hpp + include/korka/utils/overloaded.hpp + include/korka/shared/types.hpp + include/korka/shared/flat_map.hpp + include/korka/utils/frozen_hash_string_view.hpp ) target_include_directories(korka_lib @@ -57,20 +61,20 @@ add_executable(pxkorka main.cpp) target_link_libraries(pxkorka PRIVATE korka_lib) # --- TESTS --- -if (ENABLE_TESTS) - enable_testing() - - add_executable(pxkorka_tests - test/lexer.cpp - test/bytecode_builder.cpp - test/parser.cpp - ) - - target_link_libraries(pxkorka_tests - PRIVATE - korka_lib - Catch2WithMain - ) - - catch_discover_tests(pxkorka_tests) -endif () \ No newline at end of file +#if (ENABLE_TESTS) +# enable_testing() +# +# add_executable(pxkorka_tests +# test/lexer.cpp +# test/bytecode_builder.cpp +# test/parser.cpp +# ) +# +# target_link_libraries(pxkorka_tests +# PRIVATE +# korka_lib +# Catch2WithMain +# ) +# +# catch_discover_tests(pxkorka_tests) +#endif () \ No newline at end of file diff --git a/include/korka/compiler/ast_walker.hpp b/include/korka/compiler/ast_walker.hpp index b91ab5c..452afe0 100644 --- a/include/korka/compiler/ast_walker.hpp +++ b/include/korka/compiler/ast_walker.hpp @@ -1,15 +1,13 @@ #pragma once #include "parser.hpp" +#include "korka/utils/overloaded.hpp" #include #include namespace korka { - template struct overloaded : Ts... { using Ts::operator()...; }; - template overloaded(Ts...) -> overloaded; - struct ast_walker { - const std::span &pool; - parser::index_t index; + const std::span &pool; + nodes::index_t index; int indent = 0; }; } @@ -21,7 +19,7 @@ struct std::formatter { auto format(const korka::ast_walker& w, std::format_context& ctx) const { auto out = ctx.out(); - if (w.index == korka::parser::empty_node) { + if (w.index == korka::nodes::empty_node) { return std::format_to(out, "{}", std::string(w.indent * 2, ' ')); } @@ -32,7 +30,7 @@ struct std::formatter { const auto& node = w.pool[w.index]; std::string spaces(w.indent * 2, ' '); - auto fmt_child = [&](std::string_view label, korka::parser::index_t child_idx) { + auto fmt_child = [&](std::string_view label, korka::nodes::index_t child_idx) { std::format_to(out, "\n{}{}:", spaces, label); std::format_to(out, "\n{}", korka::ast_walker{w.pool, child_idx, w.indent + 1}); }; @@ -41,7 +39,7 @@ struct std::formatter { using namespace korka; std::visit(overloaded{ - [&](const parser::expr_literal& lit) { + [&](const nodes::expr_literal& lit) { std::visit(overloaded{ [&](std::monostate) { out = std::format_to(out, "null"); @@ -51,70 +49,70 @@ struct std::formatter { } }, lit); }, - [&](const parser::expr_var& v) { + [&](const nodes::expr_var& v) { out = std::format_to(out, "Var '{}'", v.name); }, - [&](const parser::expr_unary& v) { + [&](const nodes::expr_unary& v) { out = std::format_to(out, "Unary '{}'", v.op); fmt_child("child", v.child); }, - [&](const parser::expr_binary& v) { + [&](const nodes::expr_binary& v) { out = std::format_to(out, "Binary '{}'", v.op); fmt_child("L", v.left); fmt_child("R", v.right); }, - [&](const parser::expr_call& v) { + [&](const nodes::expr_call& v) { out = std::format_to(out, "Call '{}'", v.name); - if (v.args_head != parser::empty_node) { + if (v.args_head != nodes::empty_node) { fmt_child("args", v.args_head); } }, - [&](const parser::stmt_block& v) { + [&](const nodes::stmt_block& v) { out = std::format_to(out, "Block"); - if (v.children_head != parser::empty_node) { + if (v.children_head != nodes::empty_node) { fmt_child("body", v.children_head); } }, - [&](const parser::stmt_if& v) { + [&](const nodes::stmt_if& v) { out = std::format_to(out, "If"); fmt_child("cond", v.condition); fmt_child("then", v.then_branch); - if (v.else_branch != parser::empty_node) { + if (v.else_branch != nodes::empty_node) { fmt_child("else", v.else_branch); } }, - [&](const parser::stmt_while& v) { + [&](const nodes::stmt_while& v) { out = std::format_to(out, "While"); fmt_child("cond", v.condition); fmt_child("body", v.body); }, - [&](const parser::stmt_return& v) { + [&](const nodes::stmt_return& v) { out = std::format_to(out, "Return"); - if (v.expr != parser::empty_node) fmt_child("val", v.expr); + if (v.expr != nodes::empty_node) fmt_child("val", v.expr); }, - [&](const parser::stmt_expr& v) { + [&](const nodes::stmt_expr& v) { out = std::format_to(out, "ExprStmt"); fmt_child("expr", v.expr); }, - [&](const parser::decl_var& v) { + [&](const nodes::decl_var& v) { out = std::format_to(out, "DeclVar '{} {}'", v.type_name, v.var_name); - if (v.init_expr != parser::empty_node) { + if (v.init_expr != nodes::empty_node) { fmt_child("init", v.init_expr); } }, - [&](const parser::decl_function& v) { + [&](const nodes::decl_function& v) { out = std::format_to(out, "Function '{} {}'", v.ret_type, v.name); - if (v.params_head != parser::empty_node) fmt_child("params", v.params_head); + if (v.params_head != nodes::empty_node) fmt_child("params", v.params_head); fmt_child("body", v.body); }, - [&](const parser::decl_program& v) { + [&](const nodes::decl_program& v) { out = std::format_to(out, "Program"); fmt_child("roots", v.external_declarations_head); } }, node.data); - if (node.next != parser::empty_node) { + if (node.next != nodes::empty_node) { std::format_to(out, "\n{}", korka::ast_walker{w.pool, node.next, w.indent}); } diff --git a/include/korka/compiler/compiler.hpp b/include/korka/compiler/compiler.hpp index 218edad..e887d6e 100644 --- a/include/korka/compiler/compiler.hpp +++ b/include/korka/compiler/compiler.hpp @@ -1,5 +1,499 @@ #pragma once +#include "frozen/bits/elsa.h" +#include "korka/shared/error.hpp" +#include "korka/shared/flat_map.hpp" +#include "korka/utils/overloaded.hpp" +#include "korka/vm/op_codes.hpp" +#include "parser.hpp" +#include "korka/vm/bytecode_builder.hpp" +#include "korka/utils/frozen_hash_string_view.hpp" +#include +#include +#include +#include + namespace korka { + struct void_t { + }; + + using vm::type_info; + + constexpr auto string_to_type(std::string_view name) -> type { + if (name == "int") return type::i64; + else if (name == "void") return type::void_; + // TODO: other types + return type::i64; + } + + constexpr auto type_to_string(type t) -> std::string_view { + switch (t) { + case type::void_: + return "void"; + case type::i64: + return "int"; + } + } + + struct variable_info { + std::string_view name; + type_info type; + + std::size_t locals_index; + + static constexpr auto from_node(const nodes::decl_var &node) -> variable_info { + return { + .name = node.var_name, + .type{}, + .locals_index{} + }; + } + }; + + struct function_info { + std::string_view name; + std::vector params; + type_info return_type; + + vm::bytecode_builder::label label; + }; + + template + struct const_function_info { + std::string_view name; + std::size_t param_count; + std::array params; + type_info return_type; + + vm::bytecode_builder::label label; + }; + + template + struct _extract_function_signature { + template + struct param_helper; + + template + struct param_helper> { + // Map each parameter info to its corresponding C++ type + using type = vm::type_info_to_cpp_t<[]{return info_getter().return_type;}>( + vm::type_info_to_cpp_t<[]{return info_getter().params[Is].type;}>... + ); + }; + + using type = typename param_helper< + std::make_index_sequence + >::type; + }; + + template + using const_function_info_to_signature_t = typename _extract_function_signature::type; + + template + constexpr auto function_info_to_const(const function_info &f) { + const_function_info info{ + .name = f.name, + .param_count = f.params.size(), + .params{}, + .return_type = f.return_type, + .label{} + }; + + std::ranges::copy(f.params, std::begin(info.params)); + return info; + } + + struct symbol_table { + struct scope { + flat_map variables; + + std::size_t current_locals_size{}; + }; + std::vector scopes; + flat_map functions; + + constexpr auto push_scope() -> void { scopes.emplace_back(); } + + constexpr auto pop_scope() -> void { scopes.pop_back(); } + + constexpr auto declare_var(std::string_view name, const type_info &type) -> std::expected { + if (scopes.empty()) { + return std::unexpected{error::other_compiler_error{ + .message = "No scope" + }}; + } + + auto ¤t = scopes.back(); + if (current.variables.contains(name)) { + return std::unexpected{error::redeclaration{ + .identifier = name + }}; + } + + variable_info info{ + .name = name, + .type = type, + .locals_index = current.current_locals_size++ + }; + + current.variables[name] = info; + return info; + } + + constexpr auto declare_function(std::string_view name, auto &&...args) -> std::expected { + functions.emplace(std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple(name, std::forward(args)...)); + return {}; + } + + constexpr auto lookup_variable(std::string_view name) -> std::optional { + for (auto &scp: std::ranges::reverse_view(scopes)) { + if (auto var_it = scp.variables.find(name); var_it != std::end(scp.variables)) { + return var_it->second; + } + } + return std::nullopt; + } + + constexpr auto lookup_function(std::string_view name) -> std::optional { + if (auto func_it = functions.find(name); func_it != std::end(functions)) { + return func_it->second; + } + return std::nullopt; + } + + constexpr auto clear() -> void { + scopes.clear(); + functions.clear(); + } + }; + + + struct compilation_result { + std::vector bytes; + flat_map functions; + }; + + template + struct const_compilation_result { + std::array bytes; + frozen::unordered_map, NFunctions> functions; + + template + using get_signature_t = typename SignatureMapper::template get_signature_t; + + template + constexpr auto function() const -> get_signature_t * { + return nullptr; + } + + SignatureMapper mapper{}; + }; + +template +struct unique_type{}; + + template + struct signature_mapper; + + template + struct signature_mapper> { + constexpr static auto _overloaded = overloaded{ + ([](unique_type{}(function_info_getter(Is).name, 0)>) + -> const_function_info_to_signature_t<[] {return function_info_getter(Is);}> * + { + return nullptr; + })... + }; + + template + using get_signature_t = std::remove_pointer_t{}(name, 0)>{}))>; + + std::tuple *...> debug1; + std::tuple{}(function_info_getter(Is).name, 0)>...> debug2; + }; + + + template + constexpr auto compilation_result_to_const() { + // --- BYTES --- + constexpr static auto bytes = to_array<[] { return r().bytes; }>(); + + // --- FUNCTIONS --- + constexpr static auto function_count = []() constexpr { + return r().functions.size(); + }(); + constexpr static auto max_params_n = []() constexpr { + std::size_t n{}; + for (auto &&f: r().functions) { + n = std::max(n, f.second.params.size()); + } + return n; + }(); + constexpr static auto functions = []() constexpr { + std::array>, function_count> functions_data{}; + std::size_t i{}; + for (auto &&[key, value]: r().functions) { + functions_data[i++] = (std::make_pair(key, function_info_to_const(value))); + } + return frozen::make_unordered_map(functions_data); + }; + + using sign_mapper = signature_mapper<[](std::size_t i) { return (functions().begin() + i)->second; }, std::make_index_sequence>; + + return const_compilation_result{ + bytes, + functions() + }; + } + + class compiler { + public: + constexpr compiler(std::span nodes, nodes::index_t root_node) + : m_nodes(nodes), m_root_node(root_node) {} + + constexpr auto compile() -> std::expected { + m_symbols.push_scope(); + auto ok = process_node(m_root_node); + if (!ok) return std::unexpected{ok.error()}; + + return compilation_result{ + builder.build(), + m_symbols.functions + }; + } + + private: + std::span m_nodes; + nodes::index_t m_root_node; + symbol_table m_symbols; + vm::bytecode_builder builder; + + // Info for ast walker + std::optional m_current_func_ret; + + using result_t = std::expected; + + constexpr auto process_node(nodes::index_t idx) -> result_t { + const auto &node = m_nodes[idx]; + + return std::visit(overloaded{ + [&](const nodes::decl_program &program) -> result_t { + for (auto item: nodes::get_list_view(m_nodes, program.external_declarations_head)) { + auto ok = process_node(item); + if (not ok) { + return std::unexpected{ok.error()}; + } + } + + return type_info{type::void_}; + }, + [&](const nodes::decl_function &function) -> result_t { + type_info ret_type = string_to_type(function.ret_type); + m_current_func_ret = ret_type; + + // Function pointer + auto label = builder.make_label(); + + // Collect parameter info for the function signature + std::vector parameters; + for (auto p_idx: nodes::get_list_view(m_nodes, function.params_head)) { + const auto &p_node = std::get(m_nodes[p_idx].data); + parameters.push_back({ + .name = p_node.var_name, + .type = string_to_type(p_node.type_name), + .locals_index = 0 + }); + } + + // Register the function globally BEFORE processing the body + // This allows the function to "see" itself (recursion) + auto reg_ok = m_symbols.declare_function( + function.name, + parameters, + ret_type, + label + ); + if (not reg_ok) return std::unexpected{reg_ok.error()}; + + // Entering function scope + m_symbols.push_scope(); + m_current_func_ret = ret_type; + builder.bind_label(label); + + // Handle parameters as local variables + for (auto &¶m: parameters) { + auto ok = m_symbols.declare_var(param.name, param.type); + if (not ok) { + return std::unexpected{ok.error()}; + } + } + + // Function body + for (auto stmt: nodes::get_list_view(m_nodes, function.body)) { + if (auto res = process_node(stmt); !res) { + m_symbols.pop_scope(); // clean up + return res; + } + } + + // cleanup + m_symbols.pop_scope(); + m_current_func_ret.reset(); + + return {}; + }, + + [&](const nodes::expr_literal &lit) -> result_t { + return std::visit([&](auto val) -> result_t { + using T = decltype(val); + if constexpr (std::is_same_v) { + builder.emit_const(val); + return type_info{type::i64}; + } + return std::unexpected{ + error::other_compiler_error{.message = "This type is not supported as a literal yet"}}; + }, lit); + }, + [&](const nodes::stmt_block &block) -> result_t { + for (auto stmt: nodes::get_list_view(m_nodes, block.children_head)) { + if (auto res = process_node(stmt); !res) return res; + } + return {}; + }, + + [&](const nodes::stmt_return &stmt) -> result_t { + auto actual_type = process_node(stmt.expr); + if (!actual_type) return actual_type; + + // Semantic Check: Does return type match function signature? + if (m_current_func_ret && *actual_type != *m_current_func_ret) { + // TODO: proper error + return std::unexpected{error::other_compiler_error{ + .message = "Function return type mismatch" + }}; + } + + builder.emit_op(vm::op_code::ret); + return *actual_type; + }, + [&](const nodes::decl_var &var) -> result_t { + auto ok = m_symbols.declare_var(var.var_name, string_to_type(var.type_name)); + if (!ok) { + return std::unexpected{ok.error()}; + } + + if (var.init_expr != nodes::empty_node) { + auto expr = process_node(var.init_expr); + if (not expr) { + return expr; + } + builder.emit_save_local(ok->locals_index); + } + return ok->type; + }, + + [&](const nodes::expr_var &var) -> result_t { + auto info = m_symbols.lookup_variable(var.name); + if (!info) { + return std::unexpected{error::undefined_symbol{ + .identifier = var.name + }}; + } + + builder.emit_load_local(info->locals_index); + return info->type; + }, + [&](const nodes::expr_binary &expr) -> result_t { + auto left = process_node(expr.left); + if (not left) { + return left; + } + auto right = process_node(expr.right); + if (not right) { + return right; + } + + if ((*left) != (*right)) { + return std::unexpected{error::other_compiler_error{ + "Expected same types in the binary expression" + }}; + } + + auto code = vm::get_op_code_for_math(*left, *right, expr.op); + if (not code) { + return std::unexpected{code.error()}; + } + builder.emit_op(*code); + + return *left; + }, + + [&](const nodes::stmt_if &if_) -> result_t { + auto condition_expr = process_node(if_.condition); + if (not condition_expr) { + return condition_expr; + } + + auto else_branch_label = builder.make_label(); + auto end_label = builder.make_label(); + + if (if_.else_branch == nodes::empty_node) { + builder.emit_jmp_if_zero(end_label); + + auto then_branch = process_node(if_.then_branch); + if (not then_branch) { + return then_branch; + } + } else { + builder.emit_jmp_if_zero(else_branch_label); + + auto then_branch = process_node(if_.then_branch); + if (not then_branch) { + return then_branch; + } + + builder.emit_jmp(end_label); + + builder.bind_label(else_branch_label); + auto else_branch = process_node(if_.else_branch); + if (not else_branch) { + return else_branch; + } + } + + builder.bind_label(end_label); + return {}; + }, + + [&](const auto &value) -> result_t { + std::ignore = value; + return std::unexpected{error::other_compiler_error{ + "Not implemented" + }}; + } + }, node.data); + } + }; + + template + consteval static auto compile_nodes() { + constexpr static auto expected = [] constexpr { + return compiler{nodes, root}.compile(); + }; + + if constexpr (not expected()) { + report_error<[]{return expected.error();}>(); + } else { + return compilation_result_to_const<[] constexpr { return expected().value(); }>(); + } + } + + template + consteval static auto compile() { + constexpr static auto nodes_root = parse(); + return compile_nodes(); + } } // namespace korka \ No newline at end of file diff --git a/include/korka/compiler/error.hpp b/include/korka/compiler/error.hpp deleted file mode 100644 index 911f1ad..0000000 --- a/include/korka/compiler/error.hpp +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include -#include "lex_token.hpp" -#include "korka/utils/const_format.hpp" -#include - -namespace korka { - namespace error { - struct lexer_context { - std::size_t line; - }; - - struct unexpected_character { - lexer_context ctx; - char c; - }; - - constexpr auto report(const unexpected_character &err) -> std::string { - return format("Lexer Error: Unexpected character '~' at line ~", err.c, err.ctx.line); - } - - struct other_lexer_error { - lexer_context ctx; - std::string_view message; - }; - - constexpr auto report(const other_lexer_error &err) -> std::string { - return korka::format("Lexer Error: ~ at line ~", err.message, err.ctx.line); - } - - - struct parser_context { - std::optional lexeme; - }; - - struct other_parser_error { - parser_context ctx; - std::string_view message; - }; - - constexpr auto report(const other_parser_error &err) -> std::string { - if (err.ctx.lexeme) { - auto &l = err.ctx.lexeme; - return korka::format("Parser Error: ~ at ~:~ (token: ~)", err.message, l->line, l->char_pos, l->lexeme); - } - return korka::format("Parser Error: ~:~", err.message, err.ctx.lexeme->char_pos); - } - } - - using error_t = std::variant< - error::unexpected_character, - error::other_lexer_error, - error::other_parser_error>; - - constexpr auto to_string(const error_t &err) -> std::string { - return std::visit([](const auto &e) { - return error::report(e); - }, err); - } - - template - consteval auto report_error() -> void { - static_assert(false, to_string(err_getter())); - } -} \ No newline at end of file diff --git a/include/korka/compiler/lexer.hpp b/include/korka/compiler/lexer.hpp index 1a14054..d5dbb5b 100644 --- a/include/korka/compiler/lexer.hpp +++ b/include/korka/compiler/lexer.hpp @@ -13,7 +13,7 @@ #include "lex_token.hpp" #include "korka/shared.hpp" #include "korka/utils/utils.hpp" -#include "error.hpp" +#include "korka/shared/error.hpp" namespace korka { class lexer { @@ -271,18 +271,15 @@ namespace korka { template consteval auto lex() { - constexpr static auto expr = [] constexpr { + constexpr static auto expected = [] consteval { return lexer{static_cast(str)}.lex(); }; - if constexpr (expr()) { - constexpr static auto expr_getter = [] constexpr { - return expr().value(); - }; - return to_array(); + if constexpr (expected()) { + return to_array<[]{return expected().value();}>(); } else { - report_error<[] { return expr().error(); }>(); - return expr().error(); + report_error(); + return expected().error(); } } } // korka diff --git a/include/korka/compiler/parser.hpp b/include/korka/compiler/parser.hpp index 35b2f1f..e653edd 100644 --- a/include/korka/compiler/parser.hpp +++ b/include/korka/compiler/parser.hpp @@ -8,10 +8,10 @@ #include #include #include +#include namespace korka { - class parser { - public: + namespace nodes { using index_t = int32_t; constexpr static index_t empty_node = -1; @@ -40,6 +40,47 @@ namespace korka { index_t next = empty_node; }; + + struct index_iterator { + using value_type = index_t; + using difference_type = std::ptrdiff_t; + + index_t current; + std::span nodes; + + constexpr auto operator*() const -> index_t { return current; } + constexpr auto operator++() -> index_iterator& { + current = nodes[current].next; + return *this; + } + constexpr auto operator++(int) -> index_iterator { + auto self = *this; + current = nodes[current].next; + return self; + } + constexpr auto operator==(std::default_sentinel_t) const -> bool { + return current == empty_node; + } + }; + + constexpr auto get_list_view(std::span nodes, index_t head) { + return std::ranges::subrange( + index_iterator{head, nodes}, + std::default_sentinel + ); + } + } + using namespace korka::nodes; + + template + concept parser_mixin = requires (T &mixin) { + { mixin.on_function(std::declval()) }; + }; + + template + class parser { + public: + struct ast_pool { std::vector nodes{}; size_t count = 0; @@ -483,29 +524,22 @@ namespace korka { } }; - template - constexpr auto parse_tokens() { - constexpr static auto p = []constexpr{ + template + consteval auto parse_tokens() { + constexpr static auto expected = []constexpr{ return parser{std::span{tokens}}.parse(); }; - constexpr static auto pp = [] constexpr { - if constexpr (p()) { - return std::make_pair(to_array<[]{return p()->first;}>(), p()->second); - } else { - constexpr static auto get_error = [] constexpr { - return p().error(); - }; - report_error(); - return p().error(); - } - }; - - return pp(); + if constexpr(not expected()) { + report_error<[] {return expected().error();}>(); + return expected().error(); + } else { + return std::make_pair(to_array<[]{return expected()->first;}>(), expected()->second); + } } template - constexpr auto parse() { + consteval auto parse() { constexpr static auto tokens = lex(); return parse_tokens(); } diff --git a/include/korka/shared/error.hpp b/include/korka/shared/error.hpp new file mode 100644 index 0000000..1fdf92d --- /dev/null +++ b/include/korka/shared/error.hpp @@ -0,0 +1,117 @@ +#pragma once + +#include +#include "korka/compiler/lex_token.hpp" +#include "korka/utils/const_format.hpp" +#include + +namespace korka { + namespace error { + struct lexer_context { + std::size_t line; + }; + + struct unexpected_character { + lexer_context ctx; + char c; + }; + + constexpr auto report(const unexpected_character &err) -> std::string { + return format("Lexer Error: Unexpected character '~' at line ~", err.c, err.ctx.line); + } + + struct other_lexer_error { + lexer_context ctx; + std::string_view message; + }; + + constexpr auto report(const other_lexer_error &err) -> std::string { + return korka::format("Lexer Error: ~ at line ~", err.message, err.ctx.line); + } + + + struct parser_context { + std::optional lexeme; + }; + + struct other_parser_error { + parser_context ctx; + std::string_view message; + }; + + constexpr auto report(const other_parser_error &err) -> std::string { + if (err.ctx.lexeme) { + auto &l = err.ctx.lexeme; + return korka::format("Parser Error: ~ at ~:~ (token: ~)", err.message, l->line, l->char_pos, l->lexeme); + } + return korka::format("Parser Error: ~:~", err.message, err.ctx.lexeme->char_pos); + } + + struct redeclaration { + std::string_view identifier; + }; + constexpr auto report(const redeclaration &err) -> std::string { + return korka::format("Compiler Error: ~ was redeclared", err.identifier); + } + + struct unknown_type { + std::string_view identifier; + }; + constexpr auto report(const unknown_type &err) -> std::string { + return korka::format("Compiler Error: unknown type `~`", err.identifier); + } + + struct undefined_symbol { + std::string_view identifier; + }; + constexpr auto report(const undefined_symbol &err) -> std::string { + return korka::format("Compiler Error: symbol `~` not defined", err.identifier); + } + + struct function_return_type_mismatch { + std::string_view return_type; + std::string_view actual_type; + }; + constexpr auto report(const function_return_type_mismatch &err) -> std::string { + return korka::format("Compiler Error: expected ~ type to be returned, got ~", err.return_type, err.actual_type); + } + + + struct other_compiler_error { + std::string_view message; + }; + + constexpr auto report(const other_compiler_error &err) -> std::string { + return korka::format("Compiler Error: ~", err.message); + } + + struct other_error { + std::string_view message; + }; + + constexpr auto report(const other_error &err) -> std::string { + return korka::format("Error: ~", err.message); + } + } + + using error_t = std::variant< + error::unexpected_character, + error::other_lexer_error, + error::other_parser_error, + error::redeclaration, + error::undefined_symbol, + error::unknown_type, + error::other_compiler_error, + error::other_error>; + + constexpr auto to_string(const error_t &err) -> std::string { + return std::visit([](const auto &e) { + return error::report(e); + }, err); + } + + template + consteval auto report_error() -> void { + static_assert(false, to_string(err_getter())); + } +} \ No newline at end of file diff --git a/include/korka/shared/flat_map.hpp b/include/korka/shared/flat_map.hpp new file mode 100644 index 0000000..093f55f --- /dev/null +++ b/include/korka/shared/flat_map.hpp @@ -0,0 +1,98 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace korka { + template + class flat_map { + public: + using value_type = std::pair; + + constexpr flat_map() = default; + + constexpr explicit flat_map(std::vector data) + : m_data(std::move(data)) { + sort_data(); + } + + constexpr auto find(const Key &key) const { + auto it = lower_bound(key); + if (it != m_data.end() && it->first == key) { + return it; + } + return m_data.end(); + } + + constexpr auto operator[](const Key &key) -> Value & { + auto it = lower_bound(key); + + if (it == m_data.end() || it->first != key) { + it = m_data.insert(it, {key, Value{}}); + } + + return it->second; + } + + constexpr auto contains(const Key &key) const -> bool { + return find(key) != m_data.end(); + } + + constexpr auto insert(Key key, Value value) -> void { + auto it = lower_bound(key); + if (it != m_data.end() && it->first == key) { + it->second = std::move(value); + } else { + m_data.insert(it, {std::move(key), std::move(value)}); + } + } + + constexpr auto emplace(auto &&...args) -> void { + auto pair = value_type{std::forward(args)...}; + + auto it = lower_bound(pair.first); + if (it != m_data.end() && it->first == pair.first) { + it->second = std::move(pair.second); + } else { + m_data.insert(it, {std::move(pair.first), std::move(pair.second)}); + } + } + + constexpr auto begin() const { return m_data.begin(); } + + constexpr auto end() const { return m_data.end(); } + + constexpr auto size() const -> std::size_t { return m_data.size(); } + + constexpr auto clear() { + m_data.clear(); + } + + private: + std::vector m_data; + + constexpr auto lower_bound(const Key &key) const { + return std::lower_bound(m_data.begin(), m_data.end(), key, + [](const value_type &pair, const Key &k) { + return pair.first < k; + }); + } + + constexpr auto lower_bound(const Key &key) { + return std::lower_bound(m_data.begin(), m_data.end(), key, + [](const value_type &pair, const Key &k) { + return pair.first < k; + }); + } + + constexpr auto sort_data() -> void { + std::sort(m_data.begin(), m_data.end(), [](const auto &a, const auto &b) { + return a.first < b.first; + }); + } + }; +} \ No newline at end of file diff --git a/include/korka/shared/types.hpp b/include/korka/shared/types.hpp new file mode 100644 index 0000000..b6cdd65 --- /dev/null +++ b/include/korka/shared/types.hpp @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace korka { + enum class type { + void_, + i64 + }; + + namespace detail { + template + struct type_to_cpp_; + template<> + struct type_to_cpp_ { + using type = void; + }; + + template<> + struct type_to_cpp_ { + using type = std::int64_t; + }; + } + + template + using type_to_cpp_t = typename detail::type_to_cpp_::type; +} \ No newline at end of file diff --git a/include/korka/utils/frozen_hash_string_view.hpp b/include/korka/utils/frozen_hash_string_view.hpp new file mode 100644 index 0000000..d8d6fd9 --- /dev/null +++ b/include/korka/utils/frozen_hash_string_view.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace frozen { + template<> + struct elsa { + constexpr std::size_t operator()(std::string_view const &value, std::size_t seed) const { + auto hash = elsa{}; + + for (auto &&c : value) { + auto h = hash(c, seed); + seed ^= h; + seed += h; + } + + return seed; + } + }; +} diff --git a/include/korka/utils/overloaded.hpp b/include/korka/utils/overloaded.hpp new file mode 100644 index 0000000..a272a73 --- /dev/null +++ b/include/korka/utils/overloaded.hpp @@ -0,0 +1,10 @@ +#pragma once + +namespace korka { + + template + struct overloaded : Ts ... { + using Ts::operator()...; + }; + template overloaded(Ts...) -> overloaded; +} // namespace korka \ No newline at end of file diff --git a/include/korka/utils/string.hpp b/include/korka/utils/string.hpp index 9ceb03e..efae322 100644 --- a/include/korka/utils/string.hpp +++ b/include/korka/utils/string.hpp @@ -9,6 +9,8 @@ namespace korka { struct const_string { constexpr const_string() = default; + constexpr const_string(const const_string &) = default; + constexpr const_string(const char (&str)[N]) { std::copy_n(str, N, value); } @@ -19,8 +21,16 @@ namespace korka { char value[N]{}; const std::size_t length = N; + }; + template + constexpr auto const_string_from_string_view() { + const_string str; + std::copy_n(sv_getter().data(), str.length, str.value); + return str; + } + template concept StringLiteral = requires(std::decay_t t) { { t.value } -> std::convertible_to; diff --git a/include/korka/vm/bytecode_builder.hpp b/include/korka/vm/bytecode_builder.hpp index 2434a56..030a7b4 100644 --- a/include/korka/vm/bytecode_builder.hpp +++ b/include/korka/vm/bytecode_builder.hpp @@ -1,5 +1,6 @@ #include "korka/utils/byte_writer.hpp" #include "korka/utils/utils.hpp" +#include "korka/shared/types.hpp" #include "op_codes.hpp" #include "options.hpp" #include @@ -21,70 +22,67 @@ namespace korka::vm { return {next_label++}; } - constexpr auto bind(const label &l) -> auto { + /** + * Saves the label on the next byte + */ + constexpr auto bind_label(const label &l) -> auto { m_label_pos.emplace_back(l, m_data.data().size()); } - constexpr auto emit_load_imm(reg_id_t dst, stack_value_t imm) { - emit_op(op_code::load_imm); - m_data.write_many(dst, imm); - } - - constexpr auto emit_add(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::add); - m_data.write_many(dst, a, b); - } - - constexpr auto emit_sub(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::sub); - m_data.write_many(dst, a, b); + constexpr auto resolve_label(const label &label_) -> std::optional { + for (auto &&[l, i]: m_label_pos) { + if (l.id == label_.id) + return i; + } + return std::nullopt; } - constexpr auto emit_mul(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::mul); - m_data.write_many(dst, a, b); - } + // ops - constexpr auto emit_div(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::div); - m_data.write_many(dst, a, b); + constexpr auto emit_op(op_code code) -> std::size_t { + return m_last_op_pos = m_data.write(static_cast(code)); } - constexpr auto emit_cmp_lt(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::cmp_lt); - m_data.write_many(dst, a, b); + constexpr auto emit_load_local(local_index_t index) { + emit_op(op_code::lload); + m_data.write_many(index); } - constexpr auto emit_cmp_gt(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::cmp_gt); - m_data.write_many(dst, a, b); + constexpr auto emit_save_local(local_index_t index) { + emit_op(op_code::lsave); + m_data.write_many(index); } - constexpr auto emit_cmp_eq(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::cmp_eq); - m_data.write_many(dst, a, b); + template + constexpr auto emit_const(const type_to_cpp_t &value) { + emit_op(get_const_op_by_type()); + m_data.write_many(value); } // --- JUMPS --- constexpr auto emit_jmp(const label &target) { record_jump(op_code::jmp, target); } - - constexpr auto emit_jmp_if(const label &target, reg_id_t cond) { - record_jump(op_code::jmp_if, target, cond); + constexpr auto emit_jmp_if_zero(const label &target) { + record_jump(op_code::jmpz, target); } +// +// constexpr auto emit_jmp_if(const label &target, reg_id_t cond) { +// record_jump(op_code::jmp_if, target, cond); +// } constexpr auto build() -> std::vector { auto data = m_data.data(); for (auto &&j: m_jumps) { - auto label_pos = get_label_pos(j.target); + auto label_pos = resolve_label(j.target); if (not label_pos) { std::abort(); } int target_pc = *label_pos; - std::int64_t offset = target_pc - j.instr_index; + jump_offset offset = target_pc - j.instr_index; + std::ranges::copy( - std::as_bytes(std::span{&offset, 1}), + std::bit_cast>(offset), std::begin(data) + j.instr_index + op_code_size); } @@ -93,7 +91,7 @@ namespace korka::vm { private: struct pending_jump { - int instr_index; + jump_offset instr_index; label target; }; @@ -105,45 +103,31 @@ namespace korka::vm { std::vector m_jumps; std::vector> m_label_pos; - constexpr auto get_label_pos(const label &label_) -> std::optional { - for (auto &&[l, i]: m_label_pos) { - if (l.id == label_.id) - return i; - } - return std::nullopt; - } - - constexpr auto emit_op(op_code code) -> std::size_t { - return m_last_op_pos = m_data.write(static_cast(code)); - } - constexpr auto - record_jump(op_code op, const label &label_, std::optional condition = std::nullopt) -> void { + record_jump(op_code op, const label &label_) -> void { auto index = emit_op(op); - m_data.write_many(std::int64_t{}); - if (condition) - m_data.write_many(*condition); + m_data.write_many(jump_offset{}); m_jumps.emplace_back(index, label_); } }; namespace tests { - constexpr auto builder = []() constexpr { - constexpr static auto get_bytes = []() constexpr { - bytecode_builder b; - b.emit_add(0, 1, 2); - return b.build(); - }; - constexpr static auto bytes = to_array(); - return bytes; - }; - - constexpr auto bytes = builder(); - static_assert(bytes == std::array{ - static_cast(op_code::add), - static_cast(0), - static_cast(1), - static_cast(2) - }); +// constexpr auto builder = []() constexpr { +// constexpr static auto get_bytes = []() constexpr { +// bytecode_builder b; +// b.emit_add(0, 1, 2); +// return b.build(); +// }; +// constexpr static auto bytes = to_array(); +// return bytes; +// }; +// +// constexpr auto bytes = builder(); +// static_assert(bytes == std::array{ +// static_cast(op_code::add), +// static_cast(0), +// static_cast(1), +// static_cast(2) +// }); } } \ No newline at end of file diff --git a/include/korka/vm/op_codes.hpp b/include/korka/vm/op_codes.hpp index e7068c0..5e27851 100644 --- a/include/korka/vm/op_codes.hpp +++ b/include/korka/vm/op_codes.hpp @@ -1,25 +1,115 @@ #pragma once +#include +#include "korka/shared/types.hpp" + namespace korka::vm { + using local_index_t = std::uint8_t; + using jump_offset = std::int32_t; + enum class op_code { - load_imm, // rX = imm - load_arg, // rX = arg[i] + // --- Memory & Stack --- + // Loads a value from tje local at index on stack + // + lload, + + // Load parameters from stack into locals + // + pload, - add, // rC = rA + rB - sub, // rC = rA - rB - mul, // rC = rA * rB - div, // rC = rA / rB + // Pops a value from stack and saves to the local at index + // + lsave, - cmp_eq, // rC = rA == rB - cmp_lt, // rC = rA < rB - cmp_gt, // rC = rA > rB + // Pushes a value onto the stack + i64_const, // - jmp, // pc += offset - jmp_if, // if (rX) pc += offset + // --- Math --- + // Order: + // A = pop() # first on stack + // B = pop() # second on stack + // C = B / A + // push(C) - call, + i64_add, + i64_sub, + i64_mul, + i64_div, + + // --- Control flow --- + // - Jumps - + // // + jmp, // jumps no matter what + jmpz, // pops value and jumps if it's zero + + // - Other - ret }; + template + constexpr op_code get_const_op_by_type() { + if constexpr (Type == korka::type::i64) { + return op_code::i64_const; + } else { + static_assert(false, "Unknown type"); + } + } + + using type_info = std::variant; + + constexpr auto + get_op_code_for_math(type_info ltype, type_info rtype, std::string_view op) -> std::expected { + if (ltype != rtype) { + return std::unexpected{error::other_error{ + .message = "Math operations between distinct types are not supported yet" + }}; + } + + return std::visit(overloaded{ + [&](korka::type type) -> std::expected { + if (type == korka::type::i64) { + if (op == "+") + return op_code::i64_add; + if (op == "-") + return op_code::i64_sub; + if (op == "*") + return op_code::i64_mul; + if (op == "/") + return op_code::i64_div; + return std::unexpected{error::other_error{ + .message = "Unsupported math operation for i64" + }}; + } + return std::unexpected{error::other_error{ + .message = "Unsupported type for math" + }}; + }, + [&](auto &&) -> std::expected { + + return std::unexpected{error::other_error{ + .message = "Unsupported type for math" + }}; + } + }, ltype); + } + constexpr int op_code_size = 1; + + template + constexpr auto _type_info_to_cpp() { + if constexpr (std::holds_alternative(getter())) { + constexpr static korka::type t = [] { return std::get(getter()); }(); + using type = type_to_cpp_t; + if constexpr (std::is_void_v) { + return; + } else { + return std::decay_t{}; + } + } else { + static_assert(false, "Unsupported type"); + } + } + + template + using type_info_to_cpp_t = decltype(_type_info_to_cpp()); } \ No newline at end of file diff --git a/include/korka/vm/vm_runtime.hpp b/include/korka/vm/vm_runtime.hpp index 53e53b7..0a53c84 100644 --- a/include/korka/vm/vm_runtime.hpp +++ b/include/korka/vm/vm_runtime.hpp @@ -6,8 +6,6 @@ namespace korka { - class vm_runtime { - }; } // korka \ No newline at end of file diff --git a/main.cpp b/main.cpp index 8d47ce0..4aeaf21 100644 --- a/main.cpp +++ b/main.cpp @@ -1,26 +1,56 @@ #include "korka/compiler/parser.hpp" +#include "korka/compiler/compiler.hpp" #include "korka/compiler/ast_walker.hpp" #include constexpr char code[] = R"( int main() { - int i = 0; - while(i + foo(i) <= 4500) { - print("Abc"); - - i = 5 + 5 + foo("42 + 55", 2); + int a = 2; + if (a) { + return a; + } else { + return 5 + a; } - return 0; +} + +int foo(int a, int b) { + return a + b; } )"; -//constexpr static auto tokens = korka::lex(); +constexpr auto compile_result = korka::compile(); -constexpr auto ast = korka::parse(); +auto main_func = compile_result.function<"main">(); +static_assert(std::is_same_v); -constexpr auto node_pool = ast.first; -constexpr auto node_root = ast.second; +auto foo_func = compile_result.function<"foo">(); +static_assert(std::is_same_v); int main() { - std::println("{}", korka::ast_walker{node_pool, node_root, 0}); +// std::ignore = tokens; +// std::println("{:n:02X}", compile_result.bytes | std::views::transform([](auto b) { return static_cast(b); })); + +// auto lexed = korka::lexer{code}.lex(); +// if (not lexed) { +// std::println("{}", korka::to_string(lexed.error())); +// return 0; +// } +// +// auto parsed = korka::parser{lexed.value()}.parse(); +// if (not parsed) { +// std::println("{}", korka::to_string(parsed.error())); +// return 0; +// } +// auto [node_pool, node_root] = parsed.value(); +// std::println("{}", korka::ast_walker{node_pool, node_root, 0}); +// +// korka::compiler compiler{node_pool, node_root}; +// auto bytes = compiler.compile(); +// +// if (bytes) { +// std::println("{::X}", *bytes | std::views::transform([](auto b) { return static_cast(b); })); +// } else { +// std::println("{}", korka::to_string(bytes.error())); +// } + } \ No newline at end of file diff --git a/test/bytecode_builder.cpp b/test/bytecode_builder.cpp index 76a785c..3961b47 100644 --- a/test/bytecode_builder.cpp +++ b/test/bytecode_builder.cpp @@ -1,4 +1,4 @@ -#include +#include #include "korka/vm/bytecode_builder.hpp" #include "korka/utils/byte_writer.hpp" #include "korka/vm/op_codes.hpp" diff --git a/test/lexer.cpp b/test/lexer.cpp index 7955ef4..3f427d6 100644 --- a/test/lexer.cpp +++ b/test/lexer.cpp @@ -1,5 +1,5 @@ +#include #include "korka/compiler/lexer.hpp" -#include TEST_CASE("lex_token: Equality operator", "[lexer][unit]") { using namespace korka; diff --git a/test/parser.cpp b/test/parser.cpp index ed3b21d..1d06445 100644 --- a/test/parser.cpp +++ b/test/parser.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include "korka/compiler/lexer.hpp" #include "korka/compiler/parser.hpp" #include @@ -12,7 +13,7 @@ auto StrContains(const std::string &str) { return Catch::Matchers::StringContainsMatcher({str, Catch::CaseSensitive::No}); }; -static auto parse_code(std::string_view code) -> std::pair, parser::index_t> { +static auto parse_code(std::string_view code) -> std::pair, korka::nodes::index_t> { auto tokens = lexer{code}.lex(); REQUIRE(tokens); parser p(tokens.value());