From 7f7f42446895792c34db8cdc0dc8a97d92ce0558 Mon Sep 17 00:00:00 2001 From: PyXiion Date: Tue, 3 Mar 2026 18:56:27 +0300 Subject: [PATCH 1/6] Init commit --- CMakeLists.txt | 39 +-- include/korka/compiler/ast_walker.hpp | 52 ++-- include/korka/compiler/compiler.hpp | 271 +++++++++++++++++++ include/korka/compiler/lexer.hpp | 2 +- include/korka/compiler/parser.hpp | 46 +++- include/korka/{compiler => shared}/error.hpp | 46 +++- include/korka/shared/flat_map.hpp | 89 ++++++ include/korka/shared/types.hpp | 26 ++ include/korka/utils/overloaded.hpp | 10 + include/korka/vm/bytecode_builder.hpp | 114 +++----- include/korka/vm/op_codes.hpp | 29 +- main.cpp | 38 ++- test/bytecode_builder.cpp | 2 +- test/lexer.cpp | 2 +- test/parser.cpp | 5 +- 15 files changed, 620 insertions(+), 151 deletions(-) rename include/korka/{compiler => shared}/error.hpp (53%) create mode 100644 include/korka/shared/flat_map.hpp create mode 100644 include/korka/shared/types.hpp create mode 100644 include/korka/utils/overloaded.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a18693..6dcab2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,10 +38,13 @@ add_library(korka_lib include/korka/compiler/parser.hpp include/korka/shared.hpp include/korka/compiler/ast_walker.hpp - include/korka/compiler/error.hpp + include/korka/shared/error.hpp include/korka/compiler/lex_token.hpp include/korka/utils/const_format.hpp include/korka/compiler/compiler.hpp + include/korka/utils/overloaded.hpp + include/korka/shared/types.hpp + include/korka/shared/flat_map.hpp ) target_include_directories(korka_lib @@ -57,20 +60,20 @@ add_executable(pxkorka main.cpp) target_link_libraries(pxkorka PRIVATE korka_lib) # --- TESTS --- -if (ENABLE_TESTS) - enable_testing() - - add_executable(pxkorka_tests - test/lexer.cpp - test/bytecode_builder.cpp - test/parser.cpp - ) - - target_link_libraries(pxkorka_tests - PRIVATE - korka_lib - Catch2WithMain - ) - - catch_discover_tests(pxkorka_tests) -endif () \ No newline at end of file +#if (ENABLE_TESTS) +# enable_testing() +# +# add_executable(pxkorka_tests +# test/lexer.cpp +# test/bytecode_builder.cpp +# test/parser.cpp +# ) +# +# target_link_libraries(pxkorka_tests +# PRIVATE +# korka_lib +# Catch2WithMain +# ) +# +# catch_discover_tests(pxkorka_tests) +#endif () \ No newline at end of file diff --git a/include/korka/compiler/ast_walker.hpp b/include/korka/compiler/ast_walker.hpp index b91ab5c..452afe0 100644 --- a/include/korka/compiler/ast_walker.hpp +++ b/include/korka/compiler/ast_walker.hpp @@ -1,15 +1,13 @@ #pragma once #include "parser.hpp" +#include "korka/utils/overloaded.hpp" #include #include namespace korka { - template struct overloaded : Ts... { using Ts::operator()...; }; - template overloaded(Ts...) -> overloaded; - struct ast_walker { - const std::span &pool; - parser::index_t index; + const std::span &pool; + nodes::index_t index; int indent = 0; }; } @@ -21,7 +19,7 @@ struct std::formatter { auto format(const korka::ast_walker& w, std::format_context& ctx) const { auto out = ctx.out(); - if (w.index == korka::parser::empty_node) { + if (w.index == korka::nodes::empty_node) { return std::format_to(out, "{}", std::string(w.indent * 2, ' ')); } @@ -32,7 +30,7 @@ struct std::formatter { const auto& node = w.pool[w.index]; std::string spaces(w.indent * 2, ' '); - auto fmt_child = [&](std::string_view label, korka::parser::index_t child_idx) { + auto fmt_child = [&](std::string_view label, korka::nodes::index_t child_idx) { std::format_to(out, "\n{}{}:", spaces, label); std::format_to(out, "\n{}", korka::ast_walker{w.pool, child_idx, w.indent + 1}); }; @@ -41,7 +39,7 @@ struct std::formatter { using namespace korka; std::visit(overloaded{ - [&](const parser::expr_literal& lit) { + [&](const nodes::expr_literal& lit) { std::visit(overloaded{ [&](std::monostate) { out = std::format_to(out, "null"); @@ -51,70 +49,70 @@ struct std::formatter { } }, lit); }, - [&](const parser::expr_var& v) { + [&](const nodes::expr_var& v) { out = std::format_to(out, "Var '{}'", v.name); }, - [&](const parser::expr_unary& v) { + [&](const nodes::expr_unary& v) { out = std::format_to(out, "Unary '{}'", v.op); fmt_child("child", v.child); }, - [&](const parser::expr_binary& v) { + [&](const nodes::expr_binary& v) { out = std::format_to(out, "Binary '{}'", v.op); fmt_child("L", v.left); fmt_child("R", v.right); }, - [&](const parser::expr_call& v) { + [&](const nodes::expr_call& v) { out = std::format_to(out, "Call '{}'", v.name); - if (v.args_head != parser::empty_node) { + if (v.args_head != nodes::empty_node) { fmt_child("args", v.args_head); } }, - [&](const parser::stmt_block& v) { + [&](const nodes::stmt_block& v) { out = std::format_to(out, "Block"); - if (v.children_head != parser::empty_node) { + if (v.children_head != nodes::empty_node) { fmt_child("body", v.children_head); } }, - [&](const parser::stmt_if& v) { + [&](const nodes::stmt_if& v) { out = std::format_to(out, "If"); fmt_child("cond", v.condition); fmt_child("then", v.then_branch); - if (v.else_branch != parser::empty_node) { + if (v.else_branch != nodes::empty_node) { fmt_child("else", v.else_branch); } }, - [&](const parser::stmt_while& v) { + [&](const nodes::stmt_while& v) { out = std::format_to(out, "While"); fmt_child("cond", v.condition); fmt_child("body", v.body); }, - [&](const parser::stmt_return& v) { + [&](const nodes::stmt_return& v) { out = std::format_to(out, "Return"); - if (v.expr != parser::empty_node) fmt_child("val", v.expr); + if (v.expr != nodes::empty_node) fmt_child("val", v.expr); }, - [&](const parser::stmt_expr& v) { + [&](const nodes::stmt_expr& v) { out = std::format_to(out, "ExprStmt"); fmt_child("expr", v.expr); }, - [&](const parser::decl_var& v) { + [&](const nodes::decl_var& v) { out = std::format_to(out, "DeclVar '{} {}'", v.type_name, v.var_name); - if (v.init_expr != parser::empty_node) { + if (v.init_expr != nodes::empty_node) { fmt_child("init", v.init_expr); } }, - [&](const parser::decl_function& v) { + [&](const nodes::decl_function& v) { out = std::format_to(out, "Function '{} {}'", v.ret_type, v.name); - if (v.params_head != parser::empty_node) fmt_child("params", v.params_head); + if (v.params_head != nodes::empty_node) fmt_child("params", v.params_head); fmt_child("body", v.body); }, - [&](const parser::decl_program& v) { + [&](const nodes::decl_program& v) { out = std::format_to(out, "Program"); fmt_child("roots", v.external_declarations_head); } }, node.data); - if (node.next != parser::empty_node) { + if (node.next != nodes::empty_node) { std::format_to(out, "\n{}", korka::ast_walker{w.pool, node.next, w.indent}); } diff --git a/include/korka/compiler/compiler.hpp b/include/korka/compiler/compiler.hpp index 218edad..f7b106c 100644 --- a/include/korka/compiler/compiler.hpp +++ b/include/korka/compiler/compiler.hpp @@ -1,5 +1,276 @@ #pragma once +#include "korka/shared/error.hpp" +#include "korka/shared/flat_map.hpp" +#include "korka/utils/overloaded.hpp" +#include "parser.hpp" +#include "korka/vm/bytecode_builder.hpp" +#include +#include +#include +#include +#include + namespace korka { + struct void_t { + }; + + using type_info = std::variant; + + constexpr auto string_to_type(std::string_view name) -> type { + if (name == "int") return type::i64; + else if (name == "void") return type::void_; + // TODO: other types + return type::i64; + } + + constexpr auto type_to_string(type t) -> std::string_view { + switch (t) { + case type::void_: + return "void"; + case type::i64: + return "int"; + } + } + + struct variable_info { + std::string_view name; + type_info type; + // bp + stack_offset + std::size_t stack_offset; + + static constexpr auto from_node(const nodes::decl_var &node) -> variable_info { + return { + .name = node.var_name, + .type{}, + .stack_offset{} + }; + } + }; + + struct function_info { + std::string_view name; + std::vector params; + type_info return_type; + + vm::bytecode_builder::label label; + }; + + struct symbol_table { + struct scope { + flat_map variables; + std::size_t current_stack_frame_size = 0; + }; + std::vector scopes; + flat_map functions; + + constexpr auto push_scope() -> void { scopes.emplace_back(); } + + constexpr auto pop_scope() -> void { scopes.pop_back(); } + + constexpr auto declare_var(std::string_view name, const type_info &type) -> std::expected { + if (scopes.empty()) + return std::unexpected{error::other_compiler_error{ + .message = "No scope" + }}; + + auto ¤t = scopes.back(); + if (current.variables.contains(name)) + return std::unexpected{error::redeclaration{ + .identifier = name + }}; + + variable_info info{ + .name = name, + .type = type, + .stack_offset = current.current_stack_frame_size + }; + + // For now, assume every variable (i64) is 8 bytes + current.current_stack_frame_size += 8; + current.variables[name] = info; + return info; + } + + constexpr auto declare_function(std::string_view name, auto &&...args) -> std::expected { + functions.emplace(std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple(name, std::forward(args)...)); + return {}; + } + + constexpr auto lookup_variable(std::string_view name) -> std::optional { + for (auto &scope: std::ranges::reverse_view(scopes)) { + if (auto var_it = scope.variables.find(name); var_it != std::end(scope.variables)) { + return var_it->second; + } + } + return std::nullopt; + } + + constexpr auto lookup_function(std::string_view name) -> std::optional { + if (auto func_it = functions.find(name); func_it != std::end(functions)) { + return func_it->second; + } + return std::nullopt; + } + + constexpr auto clear() -> void { + scopes.clear(); + functions.clear(); + } + }; + + struct static_symbol_table { + + template + static constexpr auto make_from_table() -> static_symbol_table { + constexpr static symbol_table table = symbol_table_getter(); + } + }; + + class compiler { + public: + constexpr compiler(std::span nodes, nodes::index_t root_node) + : m_nodes(nodes), m_root_node(root_node) {} + + constexpr auto compile() -> std::expected, error_t> { + m_symbols.push_scope(); + auto ok = process_node(m_root_node); + if (!ok) return std::unexpected{ok.error()}; + + return builder.build(); + } + + private: + std::span m_nodes; + nodes::index_t m_root_node; + symbol_table m_symbols; + vm::bytecode_builder builder; + + // Info for ast walker + std::optional m_current_func_ret; + + using result_t = std::expected; + + constexpr auto process_node(nodes::index_t idx) -> result_t { + const auto &node = m_nodes[idx]; + + return std::visit(overloaded{ + [&](const nodes::decl_program &program) -> result_t { + for (auto item: nodes::get_list_view(m_nodes, program.external_declarations_head)) { + auto ok = process_node(item); + if (not ok) { + return std::unexpected{ok.error()}; + } + } + + return type_info{type::void_}; + }, + [&](const nodes::decl_function &function) -> result_t { + type_info ret_type = string_to_type(function.ret_type); + m_current_func_ret = ret_type; + + // Function pointer + auto label = builder.make_label(); + + // Collect parameter info for the function signature + std::vector parameters; + for (auto p_idx: nodes::get_list_view(m_nodes, function.params_head)) { + const auto &p_node = std::get(m_nodes[p_idx].data); + parameters.push_back({ + .name = p_node.var_name, + .type = string_to_type(p_node.type_name), + .stack_offset = 0 // Will be calculated inside the scope + }); + } + + // Register the function globally BEFORE processing the body + // This allows the function to "see" itself (recursion) + auto reg_ok = m_symbols.declare_function( + function.name, + parameters, + ret_type, + label + ); + if (not reg_ok) return std::unexpected{reg_ok.error()}; + + // Entering function scope + m_symbols.push_scope(); + m_current_func_ret = ret_type; + builder.bind_label(label); + + // Handle parameters as local variables + for (auto &¶m: parameters) { + m_symbols.declare_var(param.name, param.type); + } + + // Function body + for (auto stmt: nodes::get_list_view(m_nodes, function.body)) { + if (auto res = process_node(stmt); !res) { + m_symbols.pop_scope(); // clean up + return res; + } + } + + // cleanup + m_symbols.pop_scope(); + m_current_func_ret.reset(); + + return {}; + }, + + [&](const nodes::expr_literal &lit) -> result_t { + return std::visit([&](auto val) -> result_t { + using T = decltype(val); + if constexpr (std::is_same_v) { + builder.emit_const(val); + return type_info{type::i64}; + } + return std::unexpected{ + error::other_compiler_error{.message = "This type is not supported as a literal yet"}}; + }, lit); + }, + [&](const nodes::stmt_block &block) -> result_t { + for (auto stmt: nodes::get_list_view(m_nodes, block.children_head)) { + if (auto res = process_node(stmt); !res) return res; + } + return {}; + }, + + [&](const nodes::stmt_return &stmt) -> result_t { + auto actual_type = process_node(stmt.expr); + if (!actual_type) return actual_type; + + // Semantic Check: Does return type match function signature? + if (m_current_func_ret && *actual_type != *m_current_func_ret) { + // TODO: proper error + return std::unexpected{error::other_compiler_error{ + .message = "Function return type mismatch" + }}; + } + + builder.emit_op(vm::op_code::ret); + return *actual_type; + }, + + [&](const nodes::expr_var &var) -> result_t { + auto info = m_symbols.lookup_variable(var.name); + if (!info) + return std::unexpected{error::undefined_symbol{ + .identifier = var.name + }}; + // TODO: emit_load_local(info->stack_offset) + return info->type; + }, + [&](const auto &value) -> result_t { + std::ignore = value; + return std::unexpected{error::other_compiler_error{ + "Not implemented" + }}; + } + }, node.data); + } + }; } // namespace korka \ No newline at end of file diff --git a/include/korka/compiler/lexer.hpp b/include/korka/compiler/lexer.hpp index 1a14054..8a06de3 100644 --- a/include/korka/compiler/lexer.hpp +++ b/include/korka/compiler/lexer.hpp @@ -13,7 +13,7 @@ #include "lex_token.hpp" #include "korka/shared.hpp" #include "korka/utils/utils.hpp" -#include "error.hpp" +#include "korka/shared/error.hpp" namespace korka { class lexer { diff --git a/include/korka/compiler/parser.hpp b/include/korka/compiler/parser.hpp index 35b2f1f..2043325 100644 --- a/include/korka/compiler/parser.hpp +++ b/include/korka/compiler/parser.hpp @@ -8,10 +8,10 @@ #include #include #include +#include namespace korka { - class parser { - public: + namespace nodes { using index_t = int32_t; constexpr static index_t empty_node = -1; @@ -40,6 +40,48 @@ namespace korka { index_t next = empty_node; }; + + struct index_iterator { + using value_type = index_t; + using difference_type = std::ptrdiff_t; + + index_t current; + std::span nodes; + + auto operator*() const -> index_t { return current; } + auto operator++() -> index_iterator& { + current = nodes[current].next; + return *this; + } + auto operator++(int) -> index_iterator { + auto self = *this; + current = nodes[current].next; + return self; + } + auto operator==(std::default_sentinel_t) const -> bool { + return current == empty_node; + } + }; + + auto get_list_view(std::span nodes, index_t head) { + + return std::ranges::subrange( + index_iterator{head, nodes}, + std::default_sentinel + ); + } + } + using namespace korka::nodes; + + template + concept parser_mixin = requires (T &mixin) { + { mixin.on_function(std::declval()) }; + }; + + template + class parser { + public: + struct ast_pool { std::vector nodes{}; size_t count = 0; diff --git a/include/korka/compiler/error.hpp b/include/korka/shared/error.hpp similarity index 53% rename from include/korka/compiler/error.hpp rename to include/korka/shared/error.hpp index 911f1ad..355aba3 100644 --- a/include/korka/compiler/error.hpp +++ b/include/korka/shared/error.hpp @@ -1,7 +1,7 @@ #pragma once #include -#include "lex_token.hpp" +#include "korka/compiler/lex_token.hpp" #include "korka/utils/const_format.hpp" #include @@ -46,12 +46,54 @@ namespace korka { } return korka::format("Parser Error: ~:~", err.message, err.ctx.lexeme->char_pos); } + + struct redeclaration { + std::string_view identifier; + }; + constexpr auto report(const redeclaration &err) -> std::string { + return korka::format("Compiler Error: ~ was redeclared", err.identifier); + } + + struct unknown_type { + std::string_view identifier; + }; + constexpr auto report(const unknown_type &err) -> std::string { + return korka::format("Compiler Error: unknown type `~`", err.identifier); + } + + struct undefined_symbol { + std::string_view identifier; + }; + constexpr auto report(const undefined_symbol &err) -> std::string { + return korka::format("Compiler Error: symbol `~` not defined", err.identifier); + } + + struct function_return_type_mismatch { + std::string_view return_type; + std::string_view actual_type; + }; + constexpr auto report(const function_return_type_mismatch &err) -> std::string { + return korka::format("Compiler Error: expected ~ type to be returned, got ~", err.return_type, err.actual_type); + } + + + struct other_compiler_error { + std::string_view message; + }; + + constexpr auto report(const other_compiler_error &err) -> std::string { + return korka::format("Compiler Error: ~", err.message); + } } using error_t = std::variant< error::unexpected_character, error::other_lexer_error, - error::other_parser_error>; + error::other_parser_error, + error::redeclaration, + error::undefined_symbol, + error::unknown_type, + error::other_compiler_error>; constexpr auto to_string(const error_t &err) -> std::string { return std::visit([](const auto &e) { diff --git a/include/korka/shared/flat_map.hpp b/include/korka/shared/flat_map.hpp new file mode 100644 index 0000000..f994330 --- /dev/null +++ b/include/korka/shared/flat_map.hpp @@ -0,0 +1,89 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace korka { + template + class flat_map { + public: + using value_type = std::pair; + + constexpr flat_map() = default; + + constexpr explicit flat_map(std::vector data) + : m_data(std::move(data)) { + sort_data(); + } + + constexpr auto find(const Key& key) const { + auto it = lower_bound(key); + if (it != m_data.end() && it->first == key) { + return it; + } + return m_data.end(); + } + + constexpr auto operator[](const Key &key) const -> decltype(auto) { + auto it = find(key); + if (it == m_data.end()) { + it = m_data. + } + return *it; + } + + constexpr auto contains(const Key& key) const -> bool { + return find(key) != m_data.end(); + } + + constexpr auto insert(Key key, Value value) -> void { + auto it = lower_bound(key); + if (it != m_data.end() && it->first == key) { + it->second = std::move(value); + } else { + m_data.insert(it, {std::move(key), std::move(value)}); + } + } + + constexpr auto emplace(auto &&...args) -> void { + auto pair = value_type{std::forward(args)...}; + + auto it = lower_bound(pair.first); + if (it != m_data.end() && it->first == pair.first) { + it->second = std::move(pair.second); + } else { + m_data.insert(it, {std::move(pair.first), std::move(pair.second)}); + } + } + + constexpr auto begin() const { return m_data.begin(); } + constexpr auto end() const { return m_data.end(); } + constexpr auto size() const -> std::size_t { return m_data.size(); } + + private: + std::vector m_data; + + constexpr auto lower_bound(const Key& key) const { + return std::lower_bound(m_data.begin(), m_data.end(), key, + [](const value_type& pair, const Key& k) { + return pair.first < k; + }); + } + + constexpr auto lower_bound(const Key& key) { + return std::lower_bound(m_data.begin(), m_data.end(), key, + [](const value_type& pair, const Key& k) { + return pair.first < k; + }); + } + + constexpr auto sort_data() -> void { + std::sort(m_data.begin(), m_data.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + } + }; +} \ No newline at end of file diff --git a/include/korka/shared/types.hpp b/include/korka/shared/types.hpp new file mode 100644 index 0000000..b6cdd65 --- /dev/null +++ b/include/korka/shared/types.hpp @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace korka { + enum class type { + void_, + i64 + }; + + namespace detail { + template + struct type_to_cpp_; + template<> + struct type_to_cpp_ { + using type = void; + }; + + template<> + struct type_to_cpp_ { + using type = std::int64_t; + }; + } + + template + using type_to_cpp_t = typename detail::type_to_cpp_::type; +} \ No newline at end of file diff --git a/include/korka/utils/overloaded.hpp b/include/korka/utils/overloaded.hpp new file mode 100644 index 0000000..a272a73 --- /dev/null +++ b/include/korka/utils/overloaded.hpp @@ -0,0 +1,10 @@ +#pragma once + +namespace korka { + + template + struct overloaded : Ts ... { + using Ts::operator()...; + }; + template overloaded(Ts...) -> overloaded; +} // namespace korka \ No newline at end of file diff --git a/include/korka/vm/bytecode_builder.hpp b/include/korka/vm/bytecode_builder.hpp index 2434a56..6850906 100644 --- a/include/korka/vm/bytecode_builder.hpp +++ b/include/korka/vm/bytecode_builder.hpp @@ -1,5 +1,6 @@ #include "korka/utils/byte_writer.hpp" #include "korka/utils/utils.hpp" +#include "korka/shared/types.hpp" #include "op_codes.hpp" #include "options.hpp" #include @@ -21,63 +22,46 @@ namespace korka::vm { return {next_label++}; } - constexpr auto bind(const label &l) -> auto { + /** + * Saves the label on the next byte + */ + constexpr auto bind_label(const label &l) -> auto { m_label_pos.emplace_back(l, m_data.data().size()); } - constexpr auto emit_load_imm(reg_id_t dst, stack_value_t imm) { - emit_op(op_code::load_imm); - m_data.write_many(dst, imm); - } - - constexpr auto emit_add(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::add); - m_data.write_many(dst, a, b); - } - - constexpr auto emit_sub(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::sub); - m_data.write_many(dst, a, b); - } - - constexpr auto emit_mul(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::mul); - m_data.write_many(dst, a, b); - } - - constexpr auto emit_div(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::div); - m_data.write_many(dst, a, b); + constexpr auto resolve_label(const label &label_) -> std::optional { + for (auto &&[l, i]: m_label_pos) { + if (l.id == label_.id) + return i; + } + return std::nullopt; } - constexpr auto emit_cmp_lt(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::cmp_lt); - m_data.write_many(dst, a, b); - } + // ops - constexpr auto emit_cmp_gt(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::cmp_gt); - m_data.write_many(dst, a, b); + constexpr auto emit_op(op_code code) -> std::size_t { + return m_last_op_pos = m_data.write(static_cast(code)); } - constexpr auto emit_cmp_eq(reg_id_t dst, reg_id_t a, reg_id_t b) { - emit_op(op_code::cmp_eq); - m_data.write_many(dst, a, b); + template + constexpr auto emit_const(const type_to_cpp_t &value) { + emit_op(get_const_op_by_type()); + m_data.write_many(value); } // --- JUMPS --- - constexpr auto emit_jmp(const label &target) { - record_jump(op_code::jmp, target); - } - - constexpr auto emit_jmp_if(const label &target, reg_id_t cond) { - record_jump(op_code::jmp_if, target, cond); - } +// constexpr auto emit_jmp(const label &target) { +// record_jump(op_code::jmp, target); +// } +// +// constexpr auto emit_jmp_if(const label &target, reg_id_t cond) { +// record_jump(op_code::jmp_if, target, cond); +// } constexpr auto build() -> std::vector { auto data = m_data.data(); for (auto &&j: m_jumps) { - auto label_pos = get_label_pos(j.target); + auto label_pos = resolve_label(j.target); if (not label_pos) { std::abort(); } @@ -105,18 +89,6 @@ namespace korka::vm { std::vector m_jumps; std::vector> m_label_pos; - constexpr auto get_label_pos(const label &label_) -> std::optional { - for (auto &&[l, i]: m_label_pos) { - if (l.id == label_.id) - return i; - } - return std::nullopt; - } - - constexpr auto emit_op(op_code code) -> std::size_t { - return m_last_op_pos = m_data.write(static_cast(code)); - } - constexpr auto record_jump(op_code op, const label &label_, std::optional condition = std::nullopt) -> void { auto index = emit_op(op); @@ -128,22 +100,22 @@ namespace korka::vm { }; namespace tests { - constexpr auto builder = []() constexpr { - constexpr static auto get_bytes = []() constexpr { - bytecode_builder b; - b.emit_add(0, 1, 2); - return b.build(); - }; - constexpr static auto bytes = to_array(); - return bytes; - }; - - constexpr auto bytes = builder(); - static_assert(bytes == std::array{ - static_cast(op_code::add), - static_cast(0), - static_cast(1), - static_cast(2) - }); +// constexpr auto builder = []() constexpr { +// constexpr static auto get_bytes = []() constexpr { +// bytecode_builder b; +// b.emit_add(0, 1, 2); +// return b.build(); +// }; +// constexpr static auto bytes = to_array(); +// return bytes; +// }; +// +// constexpr auto bytes = builder(); +// static_assert(bytes == std::array{ +// static_cast(op_code::add), +// static_cast(0), +// static_cast(1), +// static_cast(2) +// }); } } \ No newline at end of file diff --git a/include/korka/vm/op_codes.hpp b/include/korka/vm/op_codes.hpp index e7068c0..e0b5337 100644 --- a/include/korka/vm/op_codes.hpp +++ b/include/korka/vm/op_codes.hpp @@ -1,25 +1,24 @@ #pragma once +#include +#include "korka/shared/types.hpp" + namespace korka::vm { enum class op_code { - load_imm, // rX = imm - load_arg, // rX = arg[i] - - add, // rC = rA + rB - sub, // rC = rA - rB - mul, // rC = rA * rB - div, // rC = rA / rB - - cmp_eq, // rC = rA == rB - cmp_lt, // rC = rA < rB - cmp_gt, // rC = rA > rB + // Pushes a value onto the stack + i64_const, // - jmp, // pc += offset - jmp_if, // if (rX) pc += offset - - call, ret }; + template + constexpr op_code get_const_op_by_type() { + if constexpr (Type == korka::type::i64) { + return op_code::i64_const; + } else { + static_assert(false, "Unknown type"); + } + } + constexpr int op_code_size = 1; } \ No newline at end of file diff --git a/main.cpp b/main.cpp index 8d47ce0..df0e9dd 100644 --- a/main.cpp +++ b/main.cpp @@ -1,26 +1,42 @@ #include "korka/compiler/parser.hpp" +#include "korka/compiler/compiler.hpp" #include "korka/compiler/ast_walker.hpp" #include constexpr char code[] = R"( int main() { - int i = 0; - while(i + foo(i) <= 4500) { - print("Abc"); - - i = 5 + 5 + foo("42 + 55", 2); - } - return 0; + return 5; } )"; //constexpr static auto tokens = korka::lex(); -constexpr auto ast = korka::parse(); - -constexpr auto node_pool = ast.first; -constexpr auto node_root = ast.second; +//constexpr auto ast = korka::parse(); +// +//constexpr auto node_pool = ast.first; +//constexpr auto node_root = ast.second; int main() { + auto lexed = korka::lexer{code}.lex(); + if (not lexed) { + std::println("{}", korka::to_string(lexed.error())); + return 0; + } + + auto parsed = korka::parser{lexed.value()}.parse(); + if (not parsed) { + std::println("{}", korka::to_string(parsed.error())); + return 0; + } + auto [node_pool, node_root] = parsed.value(); std::println("{}", korka::ast_walker{node_pool, node_root, 0}); + + korka::compiler compiler{node_pool, node_root}; + auto bytes = compiler.compile(); + + if (bytes) { + std::println("{}", *bytes | std::views::transform([](auto b) { return static_cast(b); })); + } else { + std::println("{}", korka::to_string(bytes.error())); + } } \ No newline at end of file diff --git a/test/bytecode_builder.cpp b/test/bytecode_builder.cpp index 76a785c..3961b47 100644 --- a/test/bytecode_builder.cpp +++ b/test/bytecode_builder.cpp @@ -1,4 +1,4 @@ -#include +#include #include "korka/vm/bytecode_builder.hpp" #include "korka/utils/byte_writer.hpp" #include "korka/vm/op_codes.hpp" diff --git a/test/lexer.cpp b/test/lexer.cpp index 7955ef4..3f427d6 100644 --- a/test/lexer.cpp +++ b/test/lexer.cpp @@ -1,5 +1,5 @@ +#include #include "korka/compiler/lexer.hpp" -#include TEST_CASE("lex_token: Equality operator", "[lexer][unit]") { using namespace korka; diff --git a/test/parser.cpp b/test/parser.cpp index ed3b21d..1d06445 100644 --- a/test/parser.cpp +++ b/test/parser.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include "korka/compiler/lexer.hpp" #include "korka/compiler/parser.hpp" #include @@ -12,7 +13,7 @@ auto StrContains(const std::string &str) { return Catch::Matchers::StringContainsMatcher({str, Catch::CaseSensitive::No}); }; -static auto parse_code(std::string_view code) -> std::pair, parser::index_t> { +static auto parse_code(std::string_view code) -> std::pair, korka::nodes::index_t> { auto tokens = lexer{code}.lex(); REQUIRE(tokens); parser p(tokens.value()); From e9a7d1cc4c50d60e64de9f16bdc9136cf3eb2208 Mon Sep 17 00:00:00 2001 From: PyXiion Date: Sat, 7 Mar 2026 20:32:29 +0300 Subject: [PATCH 2/6] If branches & mapping types from inside --- CMakeLists.txt | 1 + include/korka/compiler/compiler.hpp | 264 ++++++++++++++++-- include/korka/compiler/lexer.hpp | 13 +- include/korka/compiler/parser.hpp | 34 +-- include/korka/shared/error.hpp | 11 +- include/korka/shared/flat_map.hpp | 33 ++- .../korka/utils/frozen_hash_string_view.hpp | 21 ++ include/korka/utils/string.hpp | 10 + include/korka/vm/bytecode_builder.hpp | 32 ++- include/korka/vm/op_codes.hpp | 89 +++++- main.cpp | 65 +++-- 11 files changed, 474 insertions(+), 99 deletions(-) create mode 100644 include/korka/utils/frozen_hash_string_view.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dcab2e..1d641d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ add_library(korka_lib include/korka/utils/overloaded.hpp include/korka/shared/types.hpp include/korka/shared/flat_map.hpp + include/korka/utils/frozen_hash_string_view.hpp ) target_include_directories(korka_lib diff --git a/include/korka/compiler/compiler.hpp b/include/korka/compiler/compiler.hpp index f7b106c..d6348b9 100644 --- a/include/korka/compiler/compiler.hpp +++ b/include/korka/compiler/compiler.hpp @@ -1,10 +1,13 @@ #pragma once +#include "frozen/bits/elsa.h" #include "korka/shared/error.hpp" #include "korka/shared/flat_map.hpp" #include "korka/utils/overloaded.hpp" +#include "korka/vm/op_codes.hpp" #include "parser.hpp" #include "korka/vm/bytecode_builder.hpp" +#include "korka/utils/frozen_hash_string_view.hpp" #include #include #include @@ -15,7 +18,7 @@ namespace korka { struct void_t { }; - using type_info = std::variant; + using vm::type_info; constexpr auto string_to_type(std::string_view name) -> type { if (name == "int") return type::i64; @@ -36,14 +39,14 @@ namespace korka { struct variable_info { std::string_view name; type_info type; - // bp + stack_offset - std::size_t stack_offset; + + std::size_t locals_index; static constexpr auto from_node(const nodes::decl_var &node) -> variable_info { return { .name = node.var_name, .type{}, - .stack_offset{} + .locals_index{} }; } }; @@ -56,10 +59,56 @@ namespace korka { vm::bytecode_builder::label label; }; + template + struct const_function_info { + std::string_view name; + std::size_t param_count; + std::array params; + type_info return_type; + + vm::bytecode_builder::label label; + }; + + template + struct _extract_function_signature { + template + struct param_helper; + + template + struct param_helper> { + // Map each parameter info to its corresponding C++ type + using type = vm::type_info_to_cpp_t<[]{return info_getter().return_type;}>( + vm::type_info_to_cpp_t<[]{return info_getter().params[Is].type;}>... + ); + }; + + using type = typename param_helper< + std::make_index_sequence + >::type; + }; + + template + using const_function_info_to_signature_t = typename _extract_function_signature::type; + + template + constexpr auto function_info_to_const(const function_info &f) { + const_function_info info{ + .name = f.name, + .param_count = f.params.size(), + .params{}, + .return_type = f.return_type, + .label{} + }; + + std::ranges::copy(f.params, std::begin(info.params)); + return info; + } + struct symbol_table { struct scope { flat_map variables; - std::size_t current_stack_frame_size = 0; + + std::size_t current_locals_size{}; }; std::vector scopes; flat_map functions; @@ -69,25 +118,25 @@ namespace korka { constexpr auto pop_scope() -> void { scopes.pop_back(); } constexpr auto declare_var(std::string_view name, const type_info &type) -> std::expected { - if (scopes.empty()) + if (scopes.empty()) { return std::unexpected{error::other_compiler_error{ .message = "No scope" }}; + } auto ¤t = scopes.back(); - if (current.variables.contains(name)) + if (current.variables.contains(name)) { return std::unexpected{error::redeclaration{ .identifier = name }}; + } variable_info info{ .name = name, .type = type, - .stack_offset = current.current_stack_frame_size + .locals_index = current.current_locals_size++ }; - // For now, assume every variable (i64) is 8 bytes - current.current_stack_frame_size += 8; current.variables[name] = info; return info; } @@ -121,25 +170,99 @@ namespace korka { } }; - struct static_symbol_table { - template - static constexpr auto make_from_table() -> static_symbol_table { - constexpr static symbol_table table = symbol_table_getter(); + struct compilation_result { + std::vector bytes; + flat_map functions; + }; + + template + struct const_compilation_result { + std::array bytes; + frozen::unordered_map, NFunctions> functions; + + template + using get_signature_t = typename SignatureMapper::template get_signature_t; + + template + constexpr auto function() const -> get_signature_t * { + return nullptr; } + + SignatureMapper mapper{}; }; +template +struct unique_type{}; + + template + struct signature_mapper; + + template + struct signature_mapper> { + constexpr static auto _overloaded = overloaded{ + ([](unique_type{}(function_info_getter(Is).name, 0)>) + -> const_function_info_to_signature_t<[] {return function_info_getter(Is);}> * + { + return nullptr; + })... + }; + + template + using get_signature_t = std::remove_pointer_t{}(name, 0)>{}))>; + + std::tuple *...> debug1; + std::tuple{}(function_info_getter(Is).name, 0)>...> debug2; + }; + + + template + constexpr auto compilation_result_to_const() { + // --- BYTES --- + constexpr static auto bytes = to_array<[] { return r().bytes; }>(); + + // --- FUNCTIONS --- + constexpr static auto function_count = []() constexpr { + return r().functions.size(); + }(); + constexpr static auto max_params_n = []() constexpr { + std::size_t n{}; + for (auto &&f: r().functions) { + n = std::max(n, f.second.params.size()); + } + return n; + }(); + constexpr static auto functions = []() constexpr { + std::array>, function_count> functions_data{}; + std::size_t i{}; + for (auto &&[key, value]: r().functions) { + functions_data[i++] = (std::make_pair(key, function_info_to_const(value))); + } + return frozen::make_unordered_map(functions_data); + }; + + using sign_mapper = signature_mapper<[](std::size_t i) { return (functions().begin() + i)->second; }, std::make_index_sequence>; + + return const_compilation_result{ + bytes, + functions() + }; + } + class compiler { public: constexpr compiler(std::span nodes, nodes::index_t root_node) : m_nodes(nodes), m_root_node(root_node) {} - constexpr auto compile() -> std::expected, error_t> { + constexpr auto compile() -> std::expected { m_symbols.push_scope(); auto ok = process_node(m_root_node); if (!ok) return std::unexpected{ok.error()}; - return builder.build(); + return compilation_result{ + builder.build(), + m_symbols.functions + }; } private: @@ -181,7 +304,7 @@ namespace korka { parameters.push_back({ .name = p_node.var_name, .type = string_to_type(p_node.type_name), - .stack_offset = 0 // Will be calculated inside the scope + .locals_index = 0 }); } @@ -202,7 +325,10 @@ namespace korka { // Handle parameters as local variables for (auto &¶m: parameters) { - m_symbols.declare_var(param.name, param.type); + auto ok = m_symbols.declare_var(param.name, param.type); + if (not ok) { + return std::unexpected{ok.error()}; + } } // Function body @@ -253,17 +379,95 @@ namespace korka { builder.emit_op(vm::op_code::ret); return *actual_type; }, + [&](const nodes::decl_var &var) -> result_t { + auto ok = m_symbols.declare_var(var.var_name, string_to_type(var.type_name)); + if (!ok) { + return std::unexpected{ok.error()}; + } + + if (var.init_expr != nodes::empty_node) { + auto expr = process_node(var.init_expr); + if (not expr) { + return expr; + } + builder.emit_save_local(ok->locals_index); + } + return ok->type; + }, [&](const nodes::expr_var &var) -> result_t { auto info = m_symbols.lookup_variable(var.name); - if (!info) + if (!info) { return std::unexpected{error::undefined_symbol{ .identifier = var.name }}; + } - // TODO: emit_load_local(info->stack_offset) + builder.emit_load_local(info->locals_index); return info->type; }, + [&](const nodes::expr_binary &expr) -> result_t { + auto left = process_node(expr.left); + if (not left) { + return left; + } + auto right = process_node(expr.right); + if (not right) { + return right; + } + + if ((*left) != (*right)) { + return std::unexpected{error::other_compiler_error{ + "Expected same types in the binary expression" + }}; + } + + auto code = vm::get_op_code_for_math(*left, *right, expr.op); + if (not code) { + return std::unexpected{code.error()}; + } + builder.emit_op(*code); + + return *left; + }, + + [&](const nodes::stmt_if &if_) -> result_t { + auto condition_expr = process_node(if_.condition); + if (not condition_expr) { + return condition_expr; + } + + auto else_branch_label = builder.make_label(); + auto end_label = builder.make_label(); + + if (if_.else_branch == nodes::empty_node) { + builder.emit_jmp_if_zero(end_label); + + auto then_branch = process_node(if_.then_branch); + if (not then_branch) { + return then_branch; + } + } else { + builder.emit_jmp_if_zero(else_branch_label); + + auto then_branch = process_node(if_.then_branch); + if (not then_branch) { + return then_branch; + } + + builder.emit_jmp(end_label); + + builder.bind_label(else_branch_label); + auto else_branch = process_node(if_.else_branch); + if (not else_branch) { + return else_branch; + } + } + + builder.bind_label(end_label); + return {}; + }, + [&](const auto &value) -> result_t { std::ignore = value; return std::unexpected{error::other_compiler_error{ @@ -273,4 +477,24 @@ namespace korka { }, node.data); } }; + + template + constexpr static auto compile_nodes() { + constexpr static auto expected = [] constexpr { + return compiler{nodes, root}.compile(); + }; + + if constexpr (not expected()) { + report_error(); + } else { + return compilation_result_to_const<[] constexpr { return expected().value(); }>(); + } + } + + template + consteval static auto compile() { + constexpr static auto nodes_root = parse(); + + return compile_nodes(); + } } // namespace korka \ No newline at end of file diff --git a/include/korka/compiler/lexer.hpp b/include/korka/compiler/lexer.hpp index 8a06de3..d287158 100644 --- a/include/korka/compiler/lexer.hpp +++ b/include/korka/compiler/lexer.hpp @@ -271,18 +271,15 @@ namespace korka { template consteval auto lex() { - constexpr static auto expr = [] constexpr { + constexpr static auto expected = [] constexpr { return lexer{static_cast(str)}.lex(); }; - if constexpr (expr()) { - constexpr static auto expr_getter = [] constexpr { - return expr().value(); - }; - return to_array(); + if constexpr (expected()) { + return to_array<[]{return expected().value();}>(); } else { - report_error<[] { return expr().error(); }>(); - return expr().error(); + report_error(); + return expected().error(); } } } // korka diff --git a/include/korka/compiler/parser.hpp b/include/korka/compiler/parser.hpp index 2043325..68d8acd 100644 --- a/include/korka/compiler/parser.hpp +++ b/include/korka/compiler/parser.hpp @@ -48,23 +48,22 @@ namespace korka { index_t current; std::span nodes; - auto operator*() const -> index_t { return current; } - auto operator++() -> index_iterator& { + constexpr auto operator*() const -> index_t { return current; } + constexpr auto operator++() -> index_iterator& { current = nodes[current].next; return *this; } - auto operator++(int) -> index_iterator { + constexpr auto operator++(int) -> index_iterator { auto self = *this; current = nodes[current].next; return self; } - auto operator==(std::default_sentinel_t) const -> bool { + constexpr auto operator==(std::default_sentinel_t) const -> bool { return current == empty_node; } }; - auto get_list_view(std::span nodes, index_t head) { - + constexpr auto get_list_view(std::span nodes, index_t head) { return std::ranges::subrange( index_iterator{head, nodes}, std::default_sentinel @@ -525,25 +524,18 @@ namespace korka { } }; - template + template constexpr auto parse_tokens() { - constexpr static auto p = []constexpr{ + constexpr static auto expected = []constexpr{ return parser{std::span{tokens}}.parse(); }; - constexpr static auto pp = [] constexpr { - if constexpr (p()) { - return std::make_pair(to_array<[]{return p()->first;}>(), p()->second); - } else { - constexpr static auto get_error = [] constexpr { - return p().error(); - }; - report_error(); - return p().error(); - } - }; - - return pp(); + if constexpr(not expected()) { + report_error<[] {return expected().error();}>(); + return expected().error(); + } else { + return std::make_pair(to_array<[]{return expected()->first;}>(), expected()->second); + } } template diff --git a/include/korka/shared/error.hpp b/include/korka/shared/error.hpp index 355aba3..1fdf92d 100644 --- a/include/korka/shared/error.hpp +++ b/include/korka/shared/error.hpp @@ -84,6 +84,14 @@ namespace korka { constexpr auto report(const other_compiler_error &err) -> std::string { return korka::format("Compiler Error: ~", err.message); } + + struct other_error { + std::string_view message; + }; + + constexpr auto report(const other_error &err) -> std::string { + return korka::format("Error: ~", err.message); + } } using error_t = std::variant< @@ -93,7 +101,8 @@ namespace korka { error::redeclaration, error::undefined_symbol, error::unknown_type, - error::other_compiler_error>; + error::other_compiler_error, + error::other_error>; constexpr auto to_string(const error_t &err) -> std::string { return std::visit([](const auto &e) { diff --git a/include/korka/shared/flat_map.hpp b/include/korka/shared/flat_map.hpp index f994330..093f55f 100644 --- a/include/korka/shared/flat_map.hpp +++ b/include/korka/shared/flat_map.hpp @@ -1,4 +1,5 @@ #pragma once + #include #include @@ -19,7 +20,7 @@ namespace korka { sort_data(); } - constexpr auto find(const Key& key) const { + constexpr auto find(const Key &key) const { auto it = lower_bound(key); if (it != m_data.end() && it->first == key) { return it; @@ -27,15 +28,17 @@ namespace korka { return m_data.end(); } - constexpr auto operator[](const Key &key) const -> decltype(auto) { - auto it = find(key); - if (it == m_data.end()) { - it = m_data. + constexpr auto operator[](const Key &key) -> Value & { + auto it = lower_bound(key); + + if (it == m_data.end() || it->first != key) { + it = m_data.insert(it, {key, Value{}}); } - return *it; + + return it->second; } - constexpr auto contains(const Key& key) const -> bool { + constexpr auto contains(const Key &key) const -> bool { return find(key) != m_data.end(); } @@ -60,28 +63,34 @@ namespace korka { } constexpr auto begin() const { return m_data.begin(); } + constexpr auto end() const { return m_data.end(); } + constexpr auto size() const -> std::size_t { return m_data.size(); } + constexpr auto clear() { + m_data.clear(); + } + private: std::vector m_data; - constexpr auto lower_bound(const Key& key) const { + constexpr auto lower_bound(const Key &key) const { return std::lower_bound(m_data.begin(), m_data.end(), key, - [](const value_type& pair, const Key& k) { + [](const value_type &pair, const Key &k) { return pair.first < k; }); } - constexpr auto lower_bound(const Key& key) { + constexpr auto lower_bound(const Key &key) { return std::lower_bound(m_data.begin(), m_data.end(), key, - [](const value_type& pair, const Key& k) { + [](const value_type &pair, const Key &k) { return pair.first < k; }); } constexpr auto sort_data() -> void { - std::sort(m_data.begin(), m_data.end(), [](const auto& a, const auto& b) { + std::sort(m_data.begin(), m_data.end(), [](const auto &a, const auto &b) { return a.first < b.first; }); } diff --git a/include/korka/utils/frozen_hash_string_view.hpp b/include/korka/utils/frozen_hash_string_view.hpp new file mode 100644 index 0000000..d8d6fd9 --- /dev/null +++ b/include/korka/utils/frozen_hash_string_view.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace frozen { + template<> + struct elsa { + constexpr std::size_t operator()(std::string_view const &value, std::size_t seed) const { + auto hash = elsa{}; + + for (auto &&c : value) { + auto h = hash(c, seed); + seed ^= h; + seed += h; + } + + return seed; + } + }; +} diff --git a/include/korka/utils/string.hpp b/include/korka/utils/string.hpp index 9ceb03e..efae322 100644 --- a/include/korka/utils/string.hpp +++ b/include/korka/utils/string.hpp @@ -9,6 +9,8 @@ namespace korka { struct const_string { constexpr const_string() = default; + constexpr const_string(const const_string &) = default; + constexpr const_string(const char (&str)[N]) { std::copy_n(str, N, value); } @@ -19,8 +21,16 @@ namespace korka { char value[N]{}; const std::size_t length = N; + }; + template + constexpr auto const_string_from_string_view() { + const_string str; + std::copy_n(sv_getter().data(), str.length, str.value); + return str; + } + template concept StringLiteral = requires(std::decay_t t) { { t.value } -> std::convertible_to; diff --git a/include/korka/vm/bytecode_builder.hpp b/include/korka/vm/bytecode_builder.hpp index 6850906..030a7b4 100644 --- a/include/korka/vm/bytecode_builder.hpp +++ b/include/korka/vm/bytecode_builder.hpp @@ -43,6 +43,16 @@ namespace korka::vm { return m_last_op_pos = m_data.write(static_cast(code)); } + constexpr auto emit_load_local(local_index_t index) { + emit_op(op_code::lload); + m_data.write_many(index); + } + + constexpr auto emit_save_local(local_index_t index) { + emit_op(op_code::lsave); + m_data.write_many(index); + } + template constexpr auto emit_const(const type_to_cpp_t &value) { emit_op(get_const_op_by_type()); @@ -50,9 +60,12 @@ namespace korka::vm { } // --- JUMPS --- -// constexpr auto emit_jmp(const label &target) { -// record_jump(op_code::jmp, target); -// } + constexpr auto emit_jmp(const label &target) { + record_jump(op_code::jmp, target); + } + constexpr auto emit_jmp_if_zero(const label &target) { + record_jump(op_code::jmpz, target); + } // // constexpr auto emit_jmp_if(const label &target, reg_id_t cond) { // record_jump(op_code::jmp_if, target, cond); @@ -66,9 +79,10 @@ namespace korka::vm { std::abort(); } int target_pc = *label_pos; - std::int64_t offset = target_pc - j.instr_index; + jump_offset offset = target_pc - j.instr_index; + std::ranges::copy( - std::as_bytes(std::span{&offset, 1}), + std::bit_cast>(offset), std::begin(data) + j.instr_index + op_code_size); } @@ -77,7 +91,7 @@ namespace korka::vm { private: struct pending_jump { - int instr_index; + jump_offset instr_index; label target; }; @@ -90,11 +104,9 @@ namespace korka::vm { std::vector> m_label_pos; constexpr auto - record_jump(op_code op, const label &label_, std::optional condition = std::nullopt) -> void { + record_jump(op_code op, const label &label_) -> void { auto index = emit_op(op); - m_data.write_many(std::int64_t{}); - if (condition) - m_data.write_many(*condition); + m_data.write_many(jump_offset{}); m_jumps.emplace_back(index, label_); } }; diff --git a/include/korka/vm/op_codes.hpp b/include/korka/vm/op_codes.hpp index e0b5337..fabd438 100644 --- a/include/korka/vm/op_codes.hpp +++ b/include/korka/vm/op_codes.hpp @@ -4,10 +4,41 @@ #include "korka/shared/types.hpp" namespace korka::vm { + using local_index_t = std::uint8_t; + using jump_offset = std::int32_t; + enum class op_code { + // --- Memory & Stack --- + // Loads a value from tje local at index on stack + // + lload, + + // Pops a value from stack and saves to the local at index + // + lsave, + // Pushes a value onto the stack - i64_const, // + i64_const, // + + // --- Math --- + // Order: + // A = pop() # first on stack + // B = pop() # second on stack + // C = B / A + // push(C) + + i64_add, + i64_sub, + i64_mul, + i64_div, + // --- Control flow --- + // - Jumps - + // // + jmp, // jumps no matter what + jmpz, // pops value and jumps if it's zero + + // - Other - ret }; @@ -20,5 +51,61 @@ namespace korka::vm { } } + using type_info = std::variant; + + constexpr auto + get_op_code_for_math(type_info ltype, type_info rtype, std::string_view op) -> std::expected { + if (ltype != rtype) { + return std::unexpected{error::other_error{ + .message = "Math operations between distinct types are not supported yet" + }}; + } + + return std::visit(overloaded{ + [&](korka::type type) -> std::expected { + if (type == korka::type::i64) { + if (op == "+") + return op_code::i64_add; + if (op == "-") + return op_code::i64_sub; + if (op == "*") + return op_code::i64_mul; + if (op == "/") + return op_code::i64_div; + return std::unexpected{error::other_error{ + .message = "Unsupported math operation for i64" + }}; + } + return std::unexpected{error::other_error{ + .message = "Unsupported type for math" + }}; + }, + [&](auto &&) -> std::expected { + + return std::unexpected{error::other_error{ + .message = "Unsupported type for math" + }}; + } + }, ltype); + } + constexpr int op_code_size = 1; + + template + constexpr auto _type_info_to_cpp() { + if constexpr (std::holds_alternative(getter())) { + constexpr static korka::type t = [] { return std::get(getter()); }(); + using type = type_to_cpp_t; + if constexpr (std::is_void_v) { + return; + } else { + return std::decay_t{}; + } + } else { + static_assert(false, "Unsupported type"); + } + } + + template + using type_info_to_cpp_t = decltype(_type_info_to_cpp()); } \ No newline at end of file diff --git a/main.cpp b/main.cpp index df0e9dd..150eca8 100644 --- a/main.cpp +++ b/main.cpp @@ -5,38 +5,51 @@ constexpr char code[] = R"( int main() { - return 5; + int a = 2; + if (a) { + return a; + } else { + return 5 + a; + } +} + +int foo(int a, int b) { + return a + b; } )"; -//constexpr static auto tokens = korka::lex(); +constexpr auto compile_result = korka::compile(); -//constexpr auto ast = korka::parse(); -// -//constexpr auto node_pool = ast.first; -//constexpr auto node_root = ast.second; +auto main_func = compile_result.function<"main">(); +static_assert(std::is_same_v); -int main() { - auto lexed = korka::lexer{code}.lex(); - if (not lexed) { - std::println("{}", korka::to_string(lexed.error())); - return 0; - } +auto foo_func = compile_result.function<"foo">(); +static_assert(std::is_same_v); - auto parsed = korka::parser{lexed.value()}.parse(); - if (not parsed) { - std::println("{}", korka::to_string(parsed.error())); - return 0; - } - auto [node_pool, node_root] = parsed.value(); - std::println("{}", korka::ast_walker{node_pool, node_root, 0}); +int main() { + std::println("{::X}", compile_result.bytes | std::views::transform([](auto b) { return static_cast(b); })); - korka::compiler compiler{node_pool, node_root}; - auto bytes = compiler.compile(); +// auto lexed = korka::lexer{code}.lex(); +// if (not lexed) { +// std::println("{}", korka::to_string(lexed.error())); +// return 0; +// } +// +// auto parsed = korka::parser{lexed.value()}.parse(); +// if (not parsed) { +// std::println("{}", korka::to_string(parsed.error())); +// return 0; +// } +// auto [node_pool, node_root] = parsed.value(); +// std::println("{}", korka::ast_walker{node_pool, node_root, 0}); +// +// korka::compiler compiler{node_pool, node_root}; +// auto bytes = compiler.compile(); +// +// if (bytes) { +// std::println("{::X}", *bytes | std::views::transform([](auto b) { return static_cast(b); })); +// } else { +// std::println("{}", korka::to_string(bytes.error())); +// } - if (bytes) { - std::println("{}", *bytes | std::views::transform([](auto b) { return static_cast(b); })); - } else { - std::println("{}", korka::to_string(bytes.error())); - } } \ No newline at end of file From 699c1fc5aa39157eda1cfae6352e4e20fd2d4043 Mon Sep 17 00:00:00 2001 From: PyXiion Date: Sat, 7 Mar 2026 21:31:36 +0300 Subject: [PATCH 3/6] Minor fixes --- include/korka/compiler/compiler.hpp | 3 +-- main.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/korka/compiler/compiler.hpp b/include/korka/compiler/compiler.hpp index d6348b9..516119b 100644 --- a/include/korka/compiler/compiler.hpp +++ b/include/korka/compiler/compiler.hpp @@ -12,7 +12,6 @@ #include #include #include -#include namespace korka { struct void_t { @@ -485,7 +484,7 @@ struct unique_type{}; }; if constexpr (not expected()) { - report_error(); + report_error<[]{return expected.error();}>(); } else { return compilation_result_to_const<[] constexpr { return expected().value(); }>(); } diff --git a/main.cpp b/main.cpp index 150eca8..b7d2082 100644 --- a/main.cpp +++ b/main.cpp @@ -27,7 +27,7 @@ auto foo_func = compile_result.function<"foo">(); static_assert(std::is_same_v); int main() { - std::println("{::X}", compile_result.bytes | std::views::transform([](auto b) { return static_cast(b); })); + std::println("{:n:02X}", compile_result.bytes | std::views::transform([](auto b) { return static_cast(b); })); // auto lexed = korka::lexer{code}.lex(); // if (not lexed) { From 6f046cb1d71760823468b7b884240c5e7d304e25 Mon Sep 17 00:00:00 2001 From: PyXiion Date: Sun, 8 Mar 2026 19:32:13 +0300 Subject: [PATCH 4/6] Minor fixes --- include/korka/compiler/compiler.hpp | 2 +- include/korka/compiler/lexer.hpp | 2 +- include/korka/compiler/parser.hpp | 4 ++-- include/korka/vm/op_codes.hpp | 8 ++++++-- include/korka/vm/vm_runtime.hpp | 2 -- main.cpp | 3 ++- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/korka/compiler/compiler.hpp b/include/korka/compiler/compiler.hpp index 516119b..031e85d 100644 --- a/include/korka/compiler/compiler.hpp +++ b/include/korka/compiler/compiler.hpp @@ -478,7 +478,7 @@ struct unique_type{}; }; template - constexpr static auto compile_nodes() { + consteval static auto compile_nodes() { constexpr static auto expected = [] constexpr { return compiler{nodes, root}.compile(); }; diff --git a/include/korka/compiler/lexer.hpp b/include/korka/compiler/lexer.hpp index d287158..d5dbb5b 100644 --- a/include/korka/compiler/lexer.hpp +++ b/include/korka/compiler/lexer.hpp @@ -271,7 +271,7 @@ namespace korka { template consteval auto lex() { - constexpr static auto expected = [] constexpr { + constexpr static auto expected = [] consteval { return lexer{static_cast(str)}.lex(); }; diff --git a/include/korka/compiler/parser.hpp b/include/korka/compiler/parser.hpp index 68d8acd..e653edd 100644 --- a/include/korka/compiler/parser.hpp +++ b/include/korka/compiler/parser.hpp @@ -525,7 +525,7 @@ namespace korka { }; template - constexpr auto parse_tokens() { + consteval auto parse_tokens() { constexpr static auto expected = []constexpr{ return parser{std::span{tokens}}.parse(); }; @@ -539,7 +539,7 @@ namespace korka { } template - constexpr auto parse() { + consteval auto parse() { constexpr static auto tokens = lex(); return parse_tokens(); } diff --git a/include/korka/vm/op_codes.hpp b/include/korka/vm/op_codes.hpp index fabd438..5e27851 100644 --- a/include/korka/vm/op_codes.hpp +++ b/include/korka/vm/op_codes.hpp @@ -10,11 +10,15 @@ namespace korka::vm { enum class op_code { // --- Memory & Stack --- // Loads a value from tje local at index on stack - // + // lload, + // Load parameters from stack into locals + // + pload, + // Pops a value from stack and saves to the local at index - // + // lsave, // Pushes a value onto the stack diff --git a/include/korka/vm/vm_runtime.hpp b/include/korka/vm/vm_runtime.hpp index 53e53b7..0a53c84 100644 --- a/include/korka/vm/vm_runtime.hpp +++ b/include/korka/vm/vm_runtime.hpp @@ -6,8 +6,6 @@ namespace korka { - class vm_runtime { - }; } // korka \ No newline at end of file diff --git a/main.cpp b/main.cpp index b7d2082..4aeaf21 100644 --- a/main.cpp +++ b/main.cpp @@ -27,7 +27,8 @@ auto foo_func = compile_result.function<"foo">(); static_assert(std::is_same_v); int main() { - std::println("{:n:02X}", compile_result.bytes | std::views::transform([](auto b) { return static_cast(b); })); +// std::ignore = tokens; +// std::println("{:n:02X}", compile_result.bytes | std::views::transform([](auto b) { return static_cast(b); })); // auto lexed = korka::lexer{code}.lex(); // if (not lexed) { From cb0fafcfc142f3b2a3414316988f30dc3802efe6 Mon Sep 17 00:00:00 2001 From: PyXiion Date: Sun, 8 Mar 2026 19:40:32 +0300 Subject: [PATCH 5/6] Fix GCC --- .github/workflows/ubuntu-build.yml | 10 +++++----- include/korka/compiler/compiler.hpp | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ubuntu-build.yml b/.github/workflows/ubuntu-build.yml index 4737f70..213f41e 100644 --- a/.github/workflows/ubuntu-build.yml +++ b/.github/workflows/ubuntu-build.yml @@ -16,8 +16,8 @@ jobs: build_type: [Release, Debug] include: - compiler: gcc - cc: gcc-14 - cxx: g++-14 + cc: gcc-15 + cxx: g++-15 - compiler: clang cc: clang-21 cxx: clang++-21 @@ -25,11 +25,11 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install GCC 14 + - name: Install GCC 15 if: matrix.compiler == 'gcc' run: | sudo apt-get update - sudo apt-get install -y g++-14 + sudo apt-get install -y ${{ matrix.cc }} - name: Install Clang 21 if: matrix.compiler == 'clang' @@ -39,7 +39,7 @@ jobs: wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh sudo ./llvm.sh 21 - sudo apt-get install -y clang-21 + sudo apt-get install -y ${{ matrix.cc }} - name: Configure & Build run: | diff --git a/include/korka/compiler/compiler.hpp b/include/korka/compiler/compiler.hpp index 031e85d..e887d6e 100644 --- a/include/korka/compiler/compiler.hpp +++ b/include/korka/compiler/compiler.hpp @@ -148,8 +148,8 @@ namespace korka { } constexpr auto lookup_variable(std::string_view name) -> std::optional { - for (auto &scope: std::ranges::reverse_view(scopes)) { - if (auto var_it = scope.variables.find(name); var_it != std::end(scope.variables)) { + for (auto &scp: std::ranges::reverse_view(scopes)) { + if (auto var_it = scp.variables.find(name); var_it != std::end(scp.variables)) { return var_it->second; } } From e026466d7766f6fc054e4be039e214d56fdd2e59 Mon Sep 17 00:00:00 2001 From: PyXiion Date: Sun, 8 Mar 2026 19:42:06 +0300 Subject: [PATCH 6/6] GCC 15 --- .github/workflows/ubuntu-build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu-build.yml b/.github/workflows/ubuntu-build.yml index 213f41e..8c43b54 100644 --- a/.github/workflows/ubuntu-build.yml +++ b/.github/workflows/ubuntu-build.yml @@ -28,8 +28,9 @@ jobs: - name: Install GCC 15 if: matrix.compiler == 'gcc' run: | + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y sudo apt-get update - sudo apt-get install -y ${{ matrix.cc }} + sudo apt-get install -y ${{ matrix.cc }} ${{ matrix.cxx }} - name: Install Clang 21 if: matrix.compiler == 'clang'