diff --git a/.pupignore b/.pupignore index 6353280..83aaf8a 100644 --- a/.pupignore +++ b/.pupignore @@ -7,3 +7,4 @@ test/e2e/fixtures/ # Example projects - require external dependencies examples/ +.claude/ diff --git a/COMPATIBILITY.md b/COMPATIBILITY.md index b5b0cad..1a61cd4 100644 --- a/COMPATIBILITY.md +++ b/COMPATIBILITY.md @@ -205,13 +205,7 @@ Features in putup that extend beyond tup: **Affected**: MSVC 19.38+ (Visual Studio 2022 17.8) with `/Zc:nrvo /std:c++20 /O2` -`Graph::make_graph()` returns a `Graph` by value. The `Graph` owns a `StringPool` and an `unordered_map` whose hash/equal functors hold a `StringPool*` pointing into the same object. Under NRVO the returned object is constructed directly in the caller's storage, so the pointers remain valid. - -MSVC does not apply NRVO here. It constructs the `Graph` locally, then move-constructs into the caller. After the move, `StringPool` lives at a new address but `DirNameKeyHash`/`DirNameKeyEqual` still hold the old pointer — any subsequent map lookup dereferences freed memory. - -The MS docs list only two NRVO exclusions (multiple return variables, `throw` in scope), neither of which applies. This appears to be an undocumented limitation triggered when a function re-assigns an `unordered_map` member whose hash/equal functors contain pointers to the local object's own fields. - -**Status**: MSVC CI jobs are disabled. MinGW (GCC on Windows) is unaffected and remains in CI. +**Status**: Fixed. The root cause was `DirNameKeyHash`/`DirNameKeyEqual` functors holding raw `StringPool*` pointers into the same `Graph` object. When MSVC failed to apply NRVO on `make_graph()`, the move left dangling pointers. The `DirNameKey` hash map has been replaced with `std::vector dir_children` (per-directory sorted arrays), eliminating the pointer coupling entirely. MSVC CI jobs remain disabled for other reasons (MinGW/GCC is the Windows CI target). ## Reporting Issues diff --git a/DESIGN.md b/DESIGN.md index 4acd042..d1df79b 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -458,7 +458,7 @@ struct CommandNode { StringId instruction_id = StringId::Empty; // Instruction pattern (e.g., "gcc -c %f -o %o") std::vector inputs = {}; // Operand file NodeIds for %f expansion std::vector outputs = {}; // Operand file NodeIds for %o expansion - std::set exported_vars = {}; // Env vars to export (interned) + SortedIdVec exported_vars = {}; // Env vars to export (interned StringIds) std::optional generated_output = {}; // Output specification OutputAction output_action = {}; // What to do with output NodeId parent_command = INVALID_NODE_ID; // Parent command for InjectImplicitDeps @@ -546,9 +546,10 @@ struct Graph { std::unordered_map> order_only_to_index; std::unordered_map> order_only_dependents; - // Node lookup indices (with transparent lookup support) - std::unordered_map dir_name_index; // (parent, name) -> NodeId - std::unordered_map command_str_index; // command -> NodeId + // Node lookup indices + std::vector dir_children; // Per-directory name -> NodeId (indexed by parent dir) + StringPool command_strings; // Interned expanded command strings + SortedPairVec command_index; // StringId(command) -> NodeId // ID generators (next available ID for each type) NodeId next_file_id = 2; // Starts at 2 (BUILD_ROOT is 1) @@ -558,7 +559,10 @@ struct Graph { }; // Path cache is stored externally in BuildGraph for const-correctness -using PathCache = std::unordered_map; +struct PathCache { + NodeIdMap32 ids; // NodeId -> StringId (path interned in pool) + StringPool pool; // Owns the full path strings +}; class BuildGraph { Graph graph_; @@ -999,7 +1003,7 @@ struct BuildJob { std::vector inputs = {}; std::vector outputs = {}; std::vector order_only_inputs = {}; // Order-only dependencies - std::set exported_vars = {}; // Env vars to export to command + std::vector exported_vars = {}; // Env vars to export to command // For auto-generated rules (from pattern matching) bool capture_stdout = false; // Capture stdout for depfile parsing diff --git a/Tupfile b/Tupfile index aea5db4..e47a9ae 100644 --- a/Tupfile +++ b/Tupfile @@ -4,10 +4,15 @@ include_rules c-srcs-y = third_party/sha256/sha256.c # Core -srcs-y = src/core/hash.cpp +srcs-y = src/core/arena.cpp +srcs-y += src/core/hash.cpp +srcs-y += src/core/id_array.cpp +srcs-y += src/core/id_bitset.cpp srcs-y += src/core/layout.cpp srcs-y += src/core/metrics.cpp +srcs-y += src/core/path.cpp srcs-y += src/core/path_utils.cpp +srcs-y += src/core/sorted_id_vec.cpp srcs-y += src/core/string_pool.cpp srcs-y += src/core/string_utils.cpp srcs-y += src/core/terminal.cpp diff --git a/bootstrap-linux.sh b/bootstrap-linux.sh index b74a8b2..50c4ab9 100755 --- a/bootstrap-linux.sh +++ b/bootstrap-linux.sh @@ -47,28 +47,40 @@ mkdir -p "build/test/unit" (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/terminal.cpp -o build/terminal.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/string_utils.cpp -o build/string_utils.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/string_pool.cpp -o build/string_pool.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/sorted_id_vec.cpp -o build/sorted_id_vec.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/path_utils.cpp -o build/path_utils.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/path.cpp -o build/path.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/metrics.cpp -o build/metrics.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/layout.cpp -o build/layout.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/id_bitset.cpp -o build/id_bitset.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/id_array.cpp -o build/id_array.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/hash.cpp -o build/hash.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/arena.cpp -o build/arena.o) (cd "." && gcc -std=c11 -Wall -Wextra -Werror -fPIC -I./third_party -O2 -ffunction-sections -fdata-sections -Wno-error -c third_party/sha256/sha256.c -o build/sha256.o) -(cd "." && g++ build/sha256.o build/hash.o build/layout.o build/metrics.o build/path_utils.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o build/main.o -o build/putup -Wl,--gc-sections ) -(cd "." && ar rcs build/libputup.a build/sha256.o build/hash.o build/layout.o build/metrics.o build/path_utils.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o) +(cd "." && g++ build/sha256.o build/arena.o build/hash.o build/id_array.o build/id_bitset.o build/layout.o build/metrics.o build/path.o build/path_utils.o build/sorted_id_vec.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o build/main.o -o build/putup -Wl,--gc-sections ) +(cd "." && ar rcs build/libputup.a build/sha256.o build/arena.o build/hash.o build/id_array.o build/id_bitset.o build/layout.o build/metrics.o build/path.o build/path_utils.o build/sorted_id_vec.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_var_tracking.cpp -o ../../build/test/unit/test_var_tracking.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_types.cpp -o ../../build/test/unit/test_types.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_target.cpp -o ../../build/test/unit/test_target.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_string_utils.cpp -o ../../build/test/unit/test_string_utils.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_string_pool.cpp -o ../../build/test/unit/test_string_pool.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_sorted_id_vec.cpp -o ../../build/test/unit/test_sorted_id_vec.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_rule_pattern.cpp -o ../../build/test/unit/test_rule_pattern.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_platform_process.cpp -o ../../build/test/unit/test_platform_process.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_platform_file_io.cpp -o ../../build/test/unit/test_platform_file_io.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_path_utils.cpp -o ../../build/test/unit/test_path_utils.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_path.cpp -o ../../build/test/unit/test_path.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_parser.cpp -o ../../build/test/unit/test_parser.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_node_id_map.cpp -o ../../build/test/unit/test_node_id_map.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_main.cpp -o ../../build/test/unit/test_main.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_lexer.cpp -o ../../build/test/unit/test_lexer.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_layout.cpp -o ../../build/test/unit/test_layout.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_index.cpp -o ../../build/test/unit/test_index.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_ignore.cpp -o ../../build/test/unit/test_ignore.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_id_bitset.cpp -o ../../build/test/unit/test_id_bitset.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_id_array.cpp -o ../../build/test/unit/test_id_array.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_hash.cpp -o ../../build/test/unit/test_hash.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_arena.cpp -o ../../build/test/unit/test_arena.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_graph.cpp -o ../../build/test/unit/test_graph.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_glob.cpp -o ../../build/test/unit/test_glob.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_exec.cpp -o ../../build/test/unit/test_exec.o) @@ -80,4 +92,4 @@ mkdir -p "build/test/unit" (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_bench.cpp -o ../../build/test/unit/test_bench.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c e2e_fixture.cpp -o ../../build/test/unit/e2e_fixture.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -Wno-error -c ../../third_party/catch_amalgamated.cpp -o ../../build/test/unit/catch_amalgamated.o) -(cd "test/unit" && g++ ../../build/test/unit/test_bench.o ../../build/test/unit/test_builder.o ../../build/test/unit/test_dep_scanner.o ../../build/test/unit/test_depfile.o ../../build/test/unit/test_e2e.o ../../build/test/unit/test_eval.o ../../build/test/unit/test_exec.o ../../build/test/unit/test_glob.o ../../build/test/unit/test_graph.o ../../build/test/unit/test_hash.o ../../build/test/unit/test_ignore.o ../../build/test/unit/test_index.o ../../build/test/unit/test_layout.o ../../build/test/unit/test_lexer.o ../../build/test/unit/test_main.o ../../build/test/unit/test_parser.o ../../build/test/unit/test_path_utils.o ../../build/test/unit/test_platform_file_io.o ../../build/test/unit/test_platform_process.o ../../build/test/unit/test_rule_pattern.o ../../build/test/unit/test_string_utils.o ../../build/test/unit/test_target.o ../../build/test/unit/test_types.o ../../build/test/unit/test_var_tracking.o ../../build/test/unit/catch_amalgamated.o ../../build/test/unit/e2e_fixture.o ../../build/libputup.a -o ../../build/test/unit/putup_test -Wl,--gc-sections ) +(cd "test/unit" && g++ ../../build/test/unit/test_bench.o ../../build/test/unit/test_builder.o ../../build/test/unit/test_dep_scanner.o ../../build/test/unit/test_depfile.o ../../build/test/unit/test_e2e.o ../../build/test/unit/test_eval.o ../../build/test/unit/test_exec.o ../../build/test/unit/test_glob.o ../../build/test/unit/test_graph.o ../../build/test/unit/test_arena.o ../../build/test/unit/test_hash.o ../../build/test/unit/test_id_array.o ../../build/test/unit/test_id_bitset.o ../../build/test/unit/test_ignore.o ../../build/test/unit/test_index.o ../../build/test/unit/test_layout.o ../../build/test/unit/test_lexer.o ../../build/test/unit/test_main.o ../../build/test/unit/test_node_id_map.o ../../build/test/unit/test_parser.o ../../build/test/unit/test_path.o ../../build/test/unit/test_path_utils.o ../../build/test/unit/test_platform_file_io.o ../../build/test/unit/test_platform_process.o ../../build/test/unit/test_rule_pattern.o ../../build/test/unit/test_sorted_id_vec.o ../../build/test/unit/test_string_pool.o ../../build/test/unit/test_string_utils.o ../../build/test/unit/test_target.o ../../build/test/unit/test_types.o ../../build/test/unit/test_var_tracking.o ../../build/test/unit/catch_amalgamated.o ../../build/test/unit/e2e_fixture.o ../../build/libputup.a -o ../../build/test/unit/putup_test -Wl,--gc-sections ) diff --git a/bootstrap-macos.sh b/bootstrap-macos.sh index 4957a72..3c74077 100755 --- a/bootstrap-macos.sh +++ b/bootstrap-macos.sh @@ -47,28 +47,40 @@ mkdir -p "build/test/unit" (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/terminal.cpp -o build/terminal.o) (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/string_utils.cpp -o build/string_utils.o) (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/string_pool.cpp -o build/string_pool.o) +(cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/sorted_id_vec.cpp -o build/sorted_id_vec.o) (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/path_utils.cpp -o build/path_utils.o) +(cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/path.cpp -o build/path.o) (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/metrics.cpp -o build/metrics.o) (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/layout.cpp -o build/layout.o) +(cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/id_bitset.cpp -o build/id_bitset.o) +(cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/id_array.cpp -o build/id_array.o) (cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/hash.cpp -o build/hash.o) +(cd "." && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/arena.cpp -o build/arena.o) (cd "." && clang -std=c11 -Wall -Wextra -Werror -fPIC -I./third_party -O2 -ffunction-sections -fdata-sections -Wno-error -c third_party/sha256/sha256.c -o build/sha256.o) -(cd "." && clang++ build/sha256.o build/hash.o build/layout.o build/metrics.o build/path_utils.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o build/main.o -o build/putup -Wl,-dead_strip ) -(cd "." && ar rcs build/libputup.a build/sha256.o build/hash.o build/layout.o build/metrics.o build/path_utils.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o) +(cd "." && clang++ build/sha256.o build/arena.o build/hash.o build/id_array.o build/id_bitset.o build/layout.o build/metrics.o build/path.o build/path_utils.o build/sorted_id_vec.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o build/main.o -o build/putup -Wl,-dead_strip ) +(cd "." && ar rcs build/libputup.a build/sha256.o build/arena.o build/hash.o build/id_array.o build/id_bitset.o build/layout.o build/metrics.o build/path.o build/path_utils.o build/sorted_id_vec.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-posix.o build/file_io-posix.o build/env-posix.o build/path-posix.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_var_tracking.cpp -o ../../build/test/unit/test_var_tracking.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_types.cpp -o ../../build/test/unit/test_types.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_target.cpp -o ../../build/test/unit/test_target.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_string_utils.cpp -o ../../build/test/unit/test_string_utils.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_string_pool.cpp -o ../../build/test/unit/test_string_pool.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_sorted_id_vec.cpp -o ../../build/test/unit/test_sorted_id_vec.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_rule_pattern.cpp -o ../../build/test/unit/test_rule_pattern.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_platform_process.cpp -o ../../build/test/unit/test_platform_process.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_platform_file_io.cpp -o ../../build/test/unit/test_platform_file_io.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_path_utils.cpp -o ../../build/test/unit/test_path_utils.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_path.cpp -o ../../build/test/unit/test_path.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_parser.cpp -o ../../build/test/unit/test_parser.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_node_id_map.cpp -o ../../build/test/unit/test_node_id_map.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_main.cpp -o ../../build/test/unit/test_main.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_lexer.cpp -o ../../build/test/unit/test_lexer.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_layout.cpp -o ../../build/test/unit/test_layout.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_index.cpp -o ../../build/test/unit/test_index.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_ignore.cpp -o ../../build/test/unit/test_ignore.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_id_bitset.cpp -o ../../build/test/unit/test_id_bitset.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_id_array.cpp -o ../../build/test/unit/test_id_array.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_hash.cpp -o ../../build/test/unit/test_hash.o) +(cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_arena.cpp -o ../../build/test/unit/test_arena.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_graph.cpp -o ../../build/test/unit/test_graph.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_glob.cpp -o ../../build/test/unit/test_glob.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_exec.cpp -o ../../build/test/unit/test_exec.o) @@ -80,4 +92,4 @@ mkdir -p "build/test/unit" (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_bench.cpp -o ../../build/test/unit/test_bench.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c e2e_fixture.cpp -o ../../build/test/unit/e2e_fixture.o) (cd "test/unit" && clang++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -Wno-error -c ../../third_party/catch_amalgamated.cpp -o ../../build/test/unit/catch_amalgamated.o) -(cd "test/unit" && clang++ ../../build/test/unit/test_bench.o ../../build/test/unit/test_builder.o ../../build/test/unit/test_dep_scanner.o ../../build/test/unit/test_depfile.o ../../build/test/unit/test_e2e.o ../../build/test/unit/test_eval.o ../../build/test/unit/test_exec.o ../../build/test/unit/test_glob.o ../../build/test/unit/test_graph.o ../../build/test/unit/test_hash.o ../../build/test/unit/test_ignore.o ../../build/test/unit/test_index.o ../../build/test/unit/test_layout.o ../../build/test/unit/test_lexer.o ../../build/test/unit/test_main.o ../../build/test/unit/test_parser.o ../../build/test/unit/test_path_utils.o ../../build/test/unit/test_platform_file_io.o ../../build/test/unit/test_platform_process.o ../../build/test/unit/test_rule_pattern.o ../../build/test/unit/test_string_utils.o ../../build/test/unit/test_target.o ../../build/test/unit/test_types.o ../../build/test/unit/test_var_tracking.o ../../build/test/unit/catch_amalgamated.o ../../build/test/unit/e2e_fixture.o ../../build/libputup.a -o ../../build/test/unit/putup_test -Wl,-dead_strip ) +(cd "test/unit" && clang++ ../../build/test/unit/test_bench.o ../../build/test/unit/test_builder.o ../../build/test/unit/test_dep_scanner.o ../../build/test/unit/test_depfile.o ../../build/test/unit/test_e2e.o ../../build/test/unit/test_eval.o ../../build/test/unit/test_exec.o ../../build/test/unit/test_glob.o ../../build/test/unit/test_graph.o ../../build/test/unit/test_arena.o ../../build/test/unit/test_hash.o ../../build/test/unit/test_id_array.o ../../build/test/unit/test_id_bitset.o ../../build/test/unit/test_ignore.o ../../build/test/unit/test_index.o ../../build/test/unit/test_layout.o ../../build/test/unit/test_lexer.o ../../build/test/unit/test_main.o ../../build/test/unit/test_node_id_map.o ../../build/test/unit/test_parser.o ../../build/test/unit/test_path.o ../../build/test/unit/test_path_utils.o ../../build/test/unit/test_platform_file_io.o ../../build/test/unit/test_platform_process.o ../../build/test/unit/test_rule_pattern.o ../../build/test/unit/test_sorted_id_vec.o ../../build/test/unit/test_string_pool.o ../../build/test/unit/test_string_utils.o ../../build/test/unit/test_target.o ../../build/test/unit/test_types.o ../../build/test/unit/test_var_tracking.o ../../build/test/unit/catch_amalgamated.o ../../build/test/unit/e2e_fixture.o ../../build/libputup.a -o ../../build/test/unit/putup_test -Wl,-dead_strip ) diff --git a/bootstrap-mingw.sh b/bootstrap-mingw.sh index fa1f1ea..8bb547f 100755 --- a/bootstrap-mingw.sh +++ b/bootstrap-mingw.sh @@ -47,28 +47,40 @@ mkdir -p "build/test/unit" (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/terminal.cpp -o build/terminal.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/string_utils.cpp -o build/string_utils.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/string_pool.cpp -o build/string_pool.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/sorted_id_vec.cpp -o build/sorted_id_vec.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/path_utils.cpp -o build/path_utils.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/path.cpp -o build/path.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/metrics.cpp -o build/metrics.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/layout.cpp -o build/layout.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/id_bitset.cpp -o build/id_bitset.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/id_array.cpp -o build/id_array.o) (cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/hash.cpp -o build/hash.o) +(cd "." && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I./include -I./third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c src/core/arena.cpp -o build/arena.o) (cd "." && gcc -std=c11 -Wall -Wextra -Werror -fPIC -I./third_party -O2 -ffunction-sections -fdata-sections -Wno-error -c third_party/sha256/sha256.c -o build/sha256.o) -(cd "." && g++ build/sha256.o build/hash.o build/layout.o build/metrics.o build/path_utils.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-win32.o build/file_io-win32.o build/env-win32.o build/path-win32.o build/main.o -o build/putup.exe -Wl,--gc-sections -static) -(cd "." && ar rcs build/libputup.a build/sha256.o build/hash.o build/layout.o build/metrics.o build/path_utils.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-win32.o build/file_io-win32.o build/env-win32.o build/path-win32.o) +(cd "." && g++ build/sha256.o build/arena.o build/hash.o build/id_array.o build/id_bitset.o build/layout.o build/metrics.o build/path.o build/path_utils.o build/sorted_id_vec.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-win32.o build/file_io-win32.o build/env-win32.o build/path-win32.o build/main.o -o build/putup.exe -Wl,--gc-sections -static ) +(cd "." && ar rcs build/libputup.a build/sha256.o build/arena.o build/hash.o build/id_array.o build/id_bitset.o build/layout.o build/metrics.o build/path.o build/path_utils.o build/sorted_id_vec.o build/string_pool.o build/string_utils.o build/terminal.o build/ast.o build/config.o build/depfile.o build/eval.o build/glob.o build/ignore.o build/lexer.o build/parser.o build/var_tracking.o build/builder.o build/dag.o build/dep_scanner.o build/rule_pattern.o build/gcc.o build/topo.o build/entry.o build/reader.o build/writer.o build/progress_display.o build/runner.o build/scheduler.o build/cmd_build.o build/cmd_clean.o build/cmd_configure.o build/cmd_show.o build/cmd_parse.o build/config_commands.o build/context.o build/multi_variant.o build/options.o build/output.o build/target.o build/process-win32.o build/file_io-win32.o build/env-win32.o build/path-win32.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_var_tracking.cpp -o ../../build/test/unit/test_var_tracking.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_types.cpp -o ../../build/test/unit/test_types.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_target.cpp -o ../../build/test/unit/test_target.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_string_utils.cpp -o ../../build/test/unit/test_string_utils.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_string_pool.cpp -o ../../build/test/unit/test_string_pool.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_sorted_id_vec.cpp -o ../../build/test/unit/test_sorted_id_vec.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_rule_pattern.cpp -o ../../build/test/unit/test_rule_pattern.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_platform_process.cpp -o ../../build/test/unit/test_platform_process.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_platform_file_io.cpp -o ../../build/test/unit/test_platform_file_io.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_path_utils.cpp -o ../../build/test/unit/test_path_utils.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_path.cpp -o ../../build/test/unit/test_path.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_parser.cpp -o ../../build/test/unit/test_parser.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_node_id_map.cpp -o ../../build/test/unit/test_node_id_map.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_main.cpp -o ../../build/test/unit/test_main.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_lexer.cpp -o ../../build/test/unit/test_lexer.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_layout.cpp -o ../../build/test/unit/test_layout.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_index.cpp -o ../../build/test/unit/test_index.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_ignore.cpp -o ../../build/test/unit/test_ignore.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_id_bitset.cpp -o ../../build/test/unit/test_id_bitset.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_id_array.cpp -o ../../build/test/unit/test_id_array.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_hash.cpp -o ../../build/test/unit/test_hash.o) +(cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_arena.cpp -o ../../build/test/unit/test_arena.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_graph.cpp -o ../../build/test/unit/test_graph.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_glob.cpp -o ../../build/test/unit/test_glob.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_exec.cpp -o ../../build/test/unit/test_exec.o) @@ -80,4 +92,4 @@ mkdir -p "build/test/unit" (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c test_bench.cpp -o ../../build/test/unit/test_bench.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -c e2e_fixture.cpp -o ../../build/test/unit/e2e_fixture.o) (cd "test/unit" && g++ -std=c++20 -Wall -Wextra -Werror -Wpedantic -fPIC -fno-exceptions -fno-rtti -Wno-error=free-nonheap-object -I../../include -I../../third_party -O2 -DNDEBUG -ffunction-sections -fdata-sections -Wno-error -c ../../third_party/catch_amalgamated.cpp -o ../../build/test/unit/catch_amalgamated.o) -(cd "test/unit" && g++ ../../build/test/unit/test_bench.o ../../build/test/unit/test_builder.o ../../build/test/unit/test_dep_scanner.o ../../build/test/unit/test_depfile.o ../../build/test/unit/test_e2e.o ../../build/test/unit/test_eval.o ../../build/test/unit/test_exec.o ../../build/test/unit/test_glob.o ../../build/test/unit/test_graph.o ../../build/test/unit/test_hash.o ../../build/test/unit/test_ignore.o ../../build/test/unit/test_index.o ../../build/test/unit/test_layout.o ../../build/test/unit/test_lexer.o ../../build/test/unit/test_main.o ../../build/test/unit/test_parser.o ../../build/test/unit/test_path_utils.o ../../build/test/unit/test_platform_file_io.o ../../build/test/unit/test_platform_process.o ../../build/test/unit/test_rule_pattern.o ../../build/test/unit/test_string_utils.o ../../build/test/unit/test_target.o ../../build/test/unit/test_types.o ../../build/test/unit/test_var_tracking.o ../../build/test/unit/catch_amalgamated.o ../../build/test/unit/e2e_fixture.o ../../build/libputup.a -o ../../build/test/unit/putup_test.exe -Wl,--gc-sections -static) +(cd "test/unit" && g++ ../../build/test/unit/test_bench.o ../../build/test/unit/test_builder.o ../../build/test/unit/test_dep_scanner.o ../../build/test/unit/test_depfile.o ../../build/test/unit/test_e2e.o ../../build/test/unit/test_eval.o ../../build/test/unit/test_exec.o ../../build/test/unit/test_glob.o ../../build/test/unit/test_graph.o ../../build/test/unit/test_arena.o ../../build/test/unit/test_hash.o ../../build/test/unit/test_id_array.o ../../build/test/unit/test_id_bitset.o ../../build/test/unit/test_ignore.o ../../build/test/unit/test_index.o ../../build/test/unit/test_layout.o ../../build/test/unit/test_lexer.o ../../build/test/unit/test_main.o ../../build/test/unit/test_node_id_map.o ../../build/test/unit/test_parser.o ../../build/test/unit/test_path.o ../../build/test/unit/test_path_utils.o ../../build/test/unit/test_platform_file_io.o ../../build/test/unit/test_platform_process.o ../../build/test/unit/test_rule_pattern.o ../../build/test/unit/test_sorted_id_vec.o ../../build/test/unit/test_string_pool.o ../../build/test/unit/test_string_utils.o ../../build/test/unit/test_target.o ../../build/test/unit/test_types.o ../../build/test/unit/test_var_tracking.o ../../build/test/unit/catch_amalgamated.o ../../build/test/unit/e2e_fixture.o ../../build/libputup.a -o ../../build/test/unit/putup_test.exe -Wl,--gc-sections -static ) diff --git a/docs/plans/custom-containers-plan.md b/docs/plans/custom-containers-plan.md new file mode 100644 index 0000000..e71f4db --- /dev/null +++ b/docs/plans/custom-containers-plan.md @@ -0,0 +1,1104 @@ +# Custom Containers Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace `std::unordered_map`, `std::unordered_set`, and `std::set` with tailored integer-keyed primitives backed by pervasive string interning. + +**Architecture:** All strings are interned into StringId handles via StringPool. All containers become flat arrays (dense IDs), bitsets (set membership), sorted integer arrays (small sets), or arena slices (variable-length lists). The only hash table in the system is StringPool's internal Robin Hood index. Directory-name lookups use per-directory SortedPairVec arrays. NodeIds have flag bits in the high nibble, so IdArray dispatches across 4 per-type sub-arrays. + +**Tech Stack:** C++20, POSIX/Win32, Catch2 BDD tests, putup build system (Tupfiles). + +**Spec:** `docs/plans/custom-containers.md` + +--- + +## Chunk 1: Primitives + +### Task 1: IdBitSet + +**Files:** +- Create: `include/pup/core/id_bitset.hpp` +- Create: `src/core/id_bitset.cpp` +- Create: `test/unit/test_id_bitset.cpp` +- Modify: `Tupfile` — add `src/core/id_bitset.cpp` +- Modify: `test/unit/Tupfile` — add `test_id_bitset.cpp` + +- [ ] **Step 1: Write test** + +```cpp +// test/unit/test_id_bitset.cpp +#include "catch_amalgamated.hpp" +#include "pup/core/id_bitset.hpp" + +using pup::IdBitSet; + +TEST_CASE("IdBitSet basic operations", "[id_bitset]") +{ + auto bs = IdBitSet {}; + + SECTION("empty bitset") + { + REQUIRE(bs.count() == 0); + REQUIRE_FALSE(bs.contains(1)); + } + + SECTION("insert and contains") + { + bs.resize(100); + bs.insert(42); + REQUIRE(bs.contains(42)); + REQUIRE_FALSE(bs.contains(41)); + REQUIRE(bs.count() == 1); + } + + SECTION("remove") + { + bs.resize(100); + bs.insert(10); + bs.remove(10); + REQUIRE_FALSE(bs.contains(10)); + REQUIRE(bs.count() == 0); + } + + SECTION("clear") + { + bs.resize(100); + bs.insert(1); + bs.insert(50); + bs.insert(99); + bs.clear(); + REQUIRE(bs.count() == 0); + } + + SECTION("for_each iterates set bits") + { + bs.resize(200); + bs.insert(3); + bs.insert(100); + bs.insert(199); + auto collected = std::vector {}; + bs.for_each([&](uint32_t id) { collected.push_back(id); }); + REQUIRE(collected == std::vector { 3, 100, 199 }); + } + + SECTION("duplicate insert is idempotent") + { + bs.resize(10); + bs.insert(5); + bs.insert(5); + REQUIRE(bs.count() == 1); + } + + SECTION("boundary: id 0") + { + bs.resize(1); + bs.insert(0); + REQUIRE(bs.contains(0)); + } + + SECTION("boundary: id at word boundary") + { + bs.resize(128); + bs.insert(63); + bs.insert(64); + REQUIRE(bs.contains(63)); + REQUIRE(bs.contains(64)); + REQUIRE_FALSE(bs.contains(62)); + REQUIRE_FALSE(bs.contains(65)); + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `make && ./build/test/unit/putup_test "[id_bitset]" -v` +Expected: FAIL — `id_bitset.hpp` not found + +- [ ] **Step 3: Implement IdBitSet** + +```cpp +// include/pup/core/id_bitset.hpp +#pragma once + +#include +#include + +namespace pup { + +class IdBitSet { +public: + IdBitSet() = default; + ~IdBitSet(); + + IdBitSet(IdBitSet const&) = delete; + auto operator=(IdBitSet const&) -> IdBitSet& = delete; + IdBitSet(IdBitSet&&) noexcept; + auto operator=(IdBitSet&&) noexcept -> IdBitSet&; + + auto resize(std::uint32_t max_id) -> void; + auto insert(std::uint32_t id) -> void; + auto remove(std::uint32_t id) -> void; + [[nodiscard]] auto contains(std::uint32_t id) const -> bool; + auto clear() -> void; + [[nodiscard]] auto count() const -> std::size_t; + auto for_each(std::function const& fn) const -> void; + +private: + std::uint64_t* words_ = nullptr; + std::size_t word_count_ = 0; +}; + +} // namespace pup +``` + +```cpp +// src/core/id_bitset.cpp +#include "pup/core/id_bitset.hpp" +#include +#include + +namespace pup { + +IdBitSet::~IdBitSet() { std::free(words_); } + +IdBitSet::IdBitSet(IdBitSet&& o) noexcept + : words_(o.words_), word_count_(o.word_count_) +{ + o.words_ = nullptr; + o.word_count_ = 0; +} + +auto IdBitSet::operator=(IdBitSet&& o) noexcept -> IdBitSet& +{ + if (this != &o) { + std::free(words_); + words_ = o.words_; + word_count_ = o.word_count_; + o.words_ = nullptr; + o.word_count_ = 0; + } + return *this; +} + +auto IdBitSet::resize(std::uint32_t max_id) -> void +{ + auto needed = static_cast((max_id + 64) / 64); + if (needed <= word_count_) return; + words_ = static_cast(std::realloc(words_, needed * sizeof(std::uint64_t))); + std::memset(words_ + word_count_, 0, (needed - word_count_) * sizeof(std::uint64_t)); + word_count_ = needed; +} + +auto IdBitSet::insert(std::uint32_t id) -> void +{ + auto w = id / 64; + if (w >= word_count_) resize(id); + words_[w] |= std::uint64_t { 1 } << (id % 64); +} + +auto IdBitSet::remove(std::uint32_t id) -> void +{ + auto w = id / 64; + if (w < word_count_) { + words_[w] &= ~(std::uint64_t { 1 } << (id % 64)); + } +} + +auto IdBitSet::contains(std::uint32_t id) const -> bool +{ + auto w = id / 64; + if (w >= word_count_) return false; + return (words_[w] & (std::uint64_t { 1 } << (id % 64))) != 0; +} + +auto IdBitSet::clear() -> void +{ + if (words_) std::memset(words_, 0, word_count_ * sizeof(std::uint64_t)); +} + +auto IdBitSet::count() const -> std::size_t +{ + auto n = std::size_t { 0 }; + for (std::size_t i = 0; i < word_count_; ++i) { + n += static_cast(__builtin_popcountll(words_[i])); + } + return n; +} + +auto IdBitSet::for_each(std::function const& fn) const -> void +{ + for (std::size_t w = 0; w < word_count_; ++w) { + auto bits = words_[w]; + while (bits) { + auto bit = static_cast(__builtin_ctzll(bits)); + fn(static_cast(w * 64) + bit); + bits &= bits - 1; + } + } +} + +} // namespace pup +``` + +- [ ] **Step 4: Add to Tupfiles** + +In `Tupfile` after `srcs-y += src/core/hash.cpp`: +``` +srcs-y += src/core/id_bitset.cpp +``` + +In `test/unit/Tupfile` after `test-srcs-y += test_hash.cpp`: +``` +test-srcs-y += test_id_bitset.cpp +``` + +- [ ] **Step 5: Run test to verify it passes** + +Run: `make && ./build/test/unit/putup_test "[id_bitset]" -v` +Expected: All sections PASS + +- [ ] **Step 6: Run full test suite** + +Run: `make test` +Expected: All 349+ tests pass (no regressions) + +- [ ] **Step 7: Commit** + +```bash +git add include/pup/core/id_bitset.hpp src/core/id_bitset.cpp \ + test/unit/test_id_bitset.cpp Tupfile test/unit/Tupfile +git commit -m "Add IdBitSet primitive for dense ID set membership" +``` + +--- + +### Task 2: IdArray32 and IdArray64 + +**Files:** +- Create: `include/pup/core/id_array.hpp` +- Create: `src/core/id_array.cpp` +- Create: `test/unit/test_id_array.cpp` +- Modify: `Tupfile` — add `src/core/id_array.cpp` +- Modify: `test/unit/Tupfile` — add `test_id_array.cpp` + +- [ ] **Step 1: Write test** + +```cpp +// test/unit/test_id_array.cpp +#include "catch_amalgamated.hpp" +#include "pup/core/id_array.hpp" + +using pup::IdArray32; +using pup::IdArray64; + +TEST_CASE("IdArray32 basic operations", "[id_array]") +{ + auto arr = IdArray32 {}; + + SECTION("empty array") + { + REQUIRE_FALSE(arr.contains(1)); + } + + SECTION("set and get") + { + arr.resize(100); + arr.set(42, 0xDEAD); + REQUIRE(arr.contains(42)); + REQUIRE(arr.get(42) == 0xDEAD); + } + + SECTION("unset slot returns 0") + { + arr.resize(10); + REQUIRE(arr.get(5) == 0); + } + + SECTION("clear resets all") + { + arr.resize(10); + arr.set(3, 100); + arr.set(7, 200); + arr.clear(); + REQUIRE_FALSE(arr.contains(3)); + REQUIRE_FALSE(arr.contains(7)); + } + + SECTION("for_each iterates occupied slots") + { + arr.resize(100); + arr.set(10, 1); + arr.set(50, 2); + auto sum = std::uint32_t { 0 }; + arr.for_each([&](std::uint32_t id, std::uint32_t val) { sum += val; }); + REQUIRE(sum == 3); + } + + SECTION("set with value 0 is still present") + { + arr.resize(10); + arr.set(5, 0); + REQUIRE(arr.contains(5)); + } +} + +TEST_CASE("IdArray64 stores 64-bit values", "[id_array]") +{ + auto arr = IdArray64 {}; + arr.resize(10); + arr.set(3, 0xDEADBEEF'CAFEBABE); + REQUIRE(arr.get(3) == 0xDEADBEEF'CAFEBABE); +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `make && ./build/test/unit/putup_test "[id_array]" -v` +Expected: FAIL — `id_array.hpp` not found + +- [ ] **Step 3: Implement IdArray32 and IdArray64** + +`IdArray32`/`IdArray64` are flat `malloc`'d arrays with a parallel `IdBitSet` for presence tracking. Implementation in `.cpp` file. The header declares both concrete types (no template). + +Key design: `set(id, value)` automatically marks the bit in the parallel bitset. `contains(id)` checks the bitset (not the value — so value=0 is distinguishable from "not set"). + +```cpp +// include/pup/core/id_array.hpp +#pragma once + +#include "pup/core/id_bitset.hpp" +#include +#include + +namespace pup { + +class IdArray32 { +public: + IdArray32() = default; + ~IdArray32(); + IdArray32(IdArray32&&) noexcept; + auto operator=(IdArray32&&) noexcept -> IdArray32&; + IdArray32(IdArray32 const&) = delete; + auto operator=(IdArray32 const&) -> IdArray32& = delete; + + auto resize(std::uint32_t max_id) -> void; + auto set(std::uint32_t id, std::uint32_t value) -> void; + [[nodiscard]] auto get(std::uint32_t id) const -> std::uint32_t; + [[nodiscard]] auto contains(std::uint32_t id) const -> bool; + auto clear() -> void; + auto for_each(std::function const& fn) const -> void; + +private: + std::uint32_t* data_ = nullptr; + std::size_t capacity_ = 0; + IdBitSet present_; +}; + +class IdArray64 { +public: + IdArray64() = default; + ~IdArray64(); + IdArray64(IdArray64&&) noexcept; + auto operator=(IdArray64&&) noexcept -> IdArray64&; + IdArray64(IdArray64 const&) = delete; + auto operator=(IdArray64 const&) -> IdArray64& = delete; + + auto resize(std::uint32_t max_id) -> void; + auto set(std::uint32_t id, std::uint64_t value) -> void; + [[nodiscard]] auto get(std::uint32_t id) const -> std::uint64_t; + [[nodiscard]] auto contains(std::uint32_t id) const -> bool; + auto clear() -> void; + auto for_each(std::function const& fn) const -> void; + +private: + std::uint64_t* data_ = nullptr; + std::size_t capacity_ = 0; + IdBitSet present_; +}; + +} // namespace pup +``` + +Implementation in `src/core/id_array.cpp`: `malloc`/`realloc`/`free` for data array, delegates presence to `IdBitSet`. `set()` calls `present_.insert(id)`. `contains()` calls `present_.contains(id)`. `for_each()` calls `present_.for_each()` and reads `data_[id]`. + +- [ ] **Step 4: Add to Tupfiles, build, test** + +Run: `make && ./build/test/unit/putup_test "[id_array]" -v` +Expected: All sections PASS + +- [ ] **Step 5: Run full suite, commit** + +Run: `make test` + +```bash +git add include/pup/core/id_array.hpp src/core/id_array.cpp \ + test/unit/test_id_array.cpp Tupfile test/unit/Tupfile +git commit -m "Add IdArray32/IdArray64 for dense ID-indexed storage" +``` + +--- + +### Task 3: Arena32 + +**Files:** +- Create: `include/pup/core/arena.hpp` +- Create: `src/core/arena.cpp` +- Create: `test/unit/test_arena.cpp` +- Modify: `Tupfile`, `test/unit/Tupfile` + +- [ ] **Step 1: Write test** + +```cpp +// test/unit/test_arena.cpp +#include "catch_amalgamated.hpp" +#include "pup/core/arena.hpp" + +using pup::Arena32; +using pup::ArenaSlice; + +TEST_CASE("Arena32 basic operations", "[arena]") +{ + auto arena = Arena32 {}; + + SECTION("empty arena") + { + REQUIRE(arena.size() == 0); + } + + SECTION("append and get") + { + std::uint32_t vals[] = { 10, 20, 30 }; + auto slice = arena.append(vals, 3); + REQUIRE(slice.length == 3); + auto span = arena.get(slice); + REQUIRE(span[0] == 10); + REQUIRE(span[1] == 20); + REQUIRE(span[2] == 30); + } + + SECTION("multiple appends are contiguous") + { + std::uint32_t a[] = { 1, 2 }; + std::uint32_t b[] = { 3, 4, 5 }; + auto sa = arena.append(a, 2); + auto sb = arena.append(b, 3); + REQUIRE(sb.offset == sa.offset + sa.length); + REQUIRE(arena.size() == 5); + } + + SECTION("empty append") + { + auto slice = arena.append(nullptr, 0); + REQUIRE(slice.length == 0); + } + + SECTION("compact shrinks to fit") + { + arena.reserve(1000); + std::uint32_t v[] = { 42 }; + arena.append(v, 1); + arena.compact(); + REQUIRE(arena.size() == 1); + REQUIRE(arena.get(ArenaSlice { 0, 1 })[0] == 42); + } +} +``` + +- [ ] **Step 2: Run test, verify fail** +- [ ] **Step 3: Implement Arena32** + +`ArenaSlice` is a simple struct `{uint32_t offset, uint32_t length}`. `Arena32` owns a `malloc`'d `uint32_t` buffer. `append()` copies values and returns the slice. `get()` returns a pointer+length. `reserve()` pre-allocates. `compact()` reallocs to exact size. + +- [ ] **Step 4: Add to Tupfiles, build, test** +- [ ] **Step 5: Full suite, commit** + +```bash +git commit -m "Add Arena32 for append-only variable-length integer lists" +``` + +--- + +### Task 4: SortedIdVec and SortedPairVec + +**Files:** +- Create: `include/pup/core/sorted_id_vec.hpp` +- Create: `src/core/sorted_id_vec.cpp` +- Create: `test/unit/test_sorted_id_vec.cpp` +- Modify: `Tupfile`, `test/unit/Tupfile` + +- [ ] **Step 1: Write test** + +```cpp +// test/unit/test_sorted_id_vec.cpp +#include "catch_amalgamated.hpp" +#include "pup/core/sorted_id_vec.hpp" + +using pup::SortedIdVec; +using pup::SortedPairVec; + +TEST_CASE("SortedIdVec basic operations", "[sorted_id_vec]") +{ + auto vec = SortedIdVec {}; + + SECTION("insert maintains sorted order") + { + vec.insert(30); + vec.insert(10); + vec.insert(20); + REQUIRE(vec.size() == 3); + auto collected = std::vector {}; + vec.for_each([&](std::uint32_t id) { collected.push_back(id); }); + REQUIRE(collected == std::vector { 10, 20, 30 }); + } + + SECTION("duplicate insert is idempotent") + { + vec.insert(5); + vec.insert(5); + REQUIRE(vec.size() == 1); + } + + SECTION("contains") + { + vec.insert(42); + REQUIRE(vec.contains(42)); + REQUIRE_FALSE(vec.contains(41)); + } + + SECTION("remove") + { + vec.insert(1); + vec.insert(2); + vec.insert(3); + vec.remove(2); + REQUIRE(vec.size() == 2); + REQUIRE_FALSE(vec.contains(2)); + REQUIRE(vec.contains(1)); + REQUIRE(vec.contains(3)); + } +} + +TEST_CASE("SortedPairVec basic operations", "[sorted_id_vec]") +{ + auto vec = SortedPairVec {}; + + SECTION("insert and find") + { + vec.insert(10, 100); + vec.insert(5, 50); + vec.insert(15, 150); + REQUIRE(vec.find(10) != nullptr); + REQUIRE(*vec.find(10) == 100); + REQUIRE(*vec.find(5) == 50); + REQUIRE(vec.find(99) == nullptr); + } + + SECTION("insert overwrites existing key") + { + vec.insert(10, 100); + vec.insert(10, 200); + REQUIRE(vec.size() == 1); + REQUIRE(*vec.find(10) == 200); + } + + SECTION("remove") + { + vec.insert(1, 10); + vec.insert(2, 20); + vec.remove(1); + REQUIRE(vec.find(1) == nullptr); + REQUIRE(*vec.find(2) == 20); + } +} +``` + +- [ ] **Step 2: Run test, verify fail** +- [ ] **Step 3: Implement** + +`SortedIdVec`: `malloc`'d `uint32_t` array, hand-written binary search. Insert uses `memmove` to maintain order. + +`SortedPairVec`: `malloc`'d array of `{uint32_t key, uint32_t value}` pairs, sorted by key. Binary search on key. `find()` returns pointer to value or `nullptr`. + +- [ ] **Step 4: Build, test, full suite, commit** + +```bash +git commit -m "Add SortedIdVec and SortedPairVec for small sorted integer sets" +``` + +--- + +### Task 5: StringPool Robin Hood index + +**Files:** +- Modify: `include/pup/core/string_pool.hpp` +- Modify: `src/core/string_pool.cpp` +- Modify: `test/unit/test_string_pool.cpp` (if exists, else the existing tests cover it via other test files) + +- [ ] **Step 1: Write stress test for Robin Hood index** + +Add to existing string pool tests (or create `test_string_pool.cpp`): + +```cpp +TEST_CASE("StringPool Robin Hood index", "[string_pool]") +{ + auto pool = pup::StringPool {}; + + SECTION("intern and find 10K strings") + { + for (auto i = 0; i < 10000; ++i) { + auto s = "var_" + std::to_string(i); + auto id = pool.intern(s); + REQUIRE(pool.get(id) == s); + REQUIRE(pool.find(s) == id); + } + REQUIRE(pool.size() == 10000); + } + + SECTION("deduplication under load") + { + for (auto i = 0; i < 1000; ++i) { + pool.intern("same_string"); + } + REQUIRE(pool.size() == 1); + } +} +``` + +- [ ] **Step 2: Run existing tests to establish baseline** + +Run: `make test` +Expected: All pass (baseline) + +- [ ] **Step 3: Replace unordered_map index with Robin Hood** + +In `string_pool.cpp`, replace `std::unordered_map` with an internal Robin Hood hash table. The Robin Hood table: + +- Uses FNV-1a hash on string bytes +- Stores `{uint32_t hash, uint16_t displacement}` metadata per slot +- Stores `StringId` values per slot (the string_view key is reconstructed from `storage_[to_underlying(id) - 1]`) +- Key comparison: compute FNV-1a of probe key, compare hash first, then compare string content via `storage_` lookup +- Sentinel: hash=0 means empty, hash=1 means tombstone. `fix_hash()` remaps real hashes of 0/1 to 2/3. +- Growth: double at 80% load factor +- No `` include needed + +In `string_pool.hpp`, remove `#include `. Replace private member with: +```cpp +private: + std::deque storage_; + // Robin Hood index (inline — no separate type) + struct Meta { std::uint32_t hash; std::uint16_t displacement; }; + Meta* index_meta_ = nullptr; + StringId* index_values_ = nullptr; + std::size_t index_capacity_ = 0; + std::size_t index_count_ = 0; +``` + +- [ ] **Step 4: Run tests** + +Run: `make && make test` +Expected: All tests pass (including the new stress test) + +- [ ] **Step 5: Commit** + +```bash +git commit -m "Replace StringPool unordered_map with Robin Hood index + +The only hash table in the system — specialized for string_view +keys with FNV-1a hash. Removes from string_pool.hpp." +``` + +--- + +## Chunk 2: Graph Migration + +### Task 6: NodeIdMap wrapper + +**Files:** +- Create: `include/pup/core/node_id_map.hpp` + +NodeIdMap provides a single `get(NodeId)`/`set(NodeId, value)` API over 4 per-type IdArrays, dispatching on `node_id::is_command()`, `is_condition()`, `is_phi()`, else file. + +- [ ] **Step 1: Write test** + +```cpp +// test/unit/test_node_id_map.cpp +#include "catch_amalgamated.hpp" +#include "pup/core/node_id_map.hpp" +#include "pup/core/types.hpp" + +using pup::NodeIdMap32; + +TEST_CASE("NodeIdMap32 dispatches by node type", "[node_id_map]") +{ + auto map = NodeIdMap32 {}; + map.resize_files(100); + map.resize_commands(50); + + auto file_id = pup::NodeId { 5 }; + auto cmd_id = pup::node_id::make_command(3); + + SECTION("file and command slots are independent") + { + map.set(file_id, 111); + map.set(cmd_id, 222); + REQUIRE(map.get(file_id) == 111); + REQUIRE(map.get(cmd_id) == 222); + } + + SECTION("contains checks correct sub-array") + { + map.set(file_id, 1); + REQUIRE(map.contains(file_id)); + REQUIRE_FALSE(map.contains(cmd_id)); + } +} +``` + +- [ ] **Step 2: Implement NodeIdMap32/NodeIdMap64** + +Header-only wrapper. Holds 4 `IdArray32` (or `IdArray64`) members. `set()`/`get()`/`contains()` dispatch on `node_id::is_command()` etc., then call `sub_array.set(node_id::index(id), value)`. + +```cpp +// include/pup/core/node_id_map.hpp +#pragma once +#include "pup/core/id_array.hpp" +#include "pup/core/types.hpp" + +namespace pup { + +class NodeIdMap32 { +public: + auto resize_files(std::uint32_t max_idx) -> void { files_.resize(max_idx); } + auto resize_commands(std::uint32_t max_idx) -> void { cmds_.resize(max_idx); } + auto resize_conditions(std::uint32_t max_idx) -> void { conds_.resize(max_idx); } + auto resize_phis(std::uint32_t max_idx) -> void { phis_.resize(max_idx); } + + auto set(NodeId id, std::uint32_t value) -> void; + [[nodiscard]] auto get(NodeId id) const -> std::uint32_t; + [[nodiscard]] auto contains(NodeId id) const -> bool; + auto clear() -> void; + +private: + IdArray32 files_, cmds_, conds_, phis_; +}; + +// Same pattern for NodeIdMap64 + +} // namespace pup +``` + +Implementation dispatches on `node_id::is_command(id)` etc. + +- [ ] **Step 3: Build, test, commit** + +```bash +git commit -m "Add NodeIdMap32/64 wrapper dispatching by node type flags" +``` + +--- + +### Task 7: Migrate Graph edge indices + +**Files:** +- Modify: `include/pup/graph/dag.hpp` — replace 4 edge index `unordered_map`s with `NodeIdMap64` + `Arena32` +- Modify: `src/graph/dag.cpp` — update `add_edge()`, `build_edge_indices()`, edge lookup functions, `get_inputs()`, `get_outputs()`, `edges_to()`, `edges_from()` +- Modify: `src/graph/builder.cpp` — update callers of edge lookup functions +- Modify: `src/graph/topo.cpp` — update callers of edge traversal +- Modify: `src/exec/scheduler.cpp` — update job dependency resolution +- Modify: `src/cli/cmd_build.cpp` — update upstream node walk +- Modify: `src/cli/cmd_show.cpp` — update node display/inspection + +**IMPORTANT:** Changing edge lookup return types (from `vector` to a span/view over arena data) affects ALL consumers. All files above must be updated in the SAME commit or the build will break. + +This is the highest-impact change. The 4 edge index maps become: + +```cpp +// In Graph struct: +Arena32 edge_arena; // shared arena for all edge lists +NodeIdMap64 edges_to_index; // NodeId → ArenaSlice (indices into edges vector) +NodeIdMap64 edges_from_index; +NodeIdMap64 order_only_to; // NodeId → ArenaSlice (NodeId lists) +NodeIdMap64 order_only_deps; +``` + +- [ ] **Step 1: Replace edge index maps in dag.hpp** + +Change the 4 `std::unordered_map>` members to `NodeIdMap64` + shared `Arena32`. Keep the `std::vector edges` central storage (Arena is for the index, not the edges themselves). + +- [ ] **Step 2: Write test for arena-backed edge indices** + +```cpp +TEST_CASE("Graph edge indices use Arena32", "[dag]") +{ + // Build a small graph with known edges + // Call build_edge_indices() + // Verify edges_to(node) and edges_from(node) return correct data + // Verify order_only lookups work +} +``` + +- [ ] **Step 3: Update build_edge_indices() in dag.cpp** + +`build_edge_indices()` currently iterates `edges` and populates the maps. Rewrite to: +1. Count edges per node (first pass) +2. Resize NodeIdMap64 arrays for all 4 node types +3. Allocate Arena slices per node +4. Fill Arena slices (second pass) + +- [ ] **Step 4: Update edge lookup functions and ALL consumers** + +Functions like `edges_to(NodeId)`, `edges_from(NodeId)` change return type. Update every file listed above in the same pass. + +- [ ] **Step 5: Build, test full suite** + +Run: `make && make test` +Expected: All 349+ tests pass + +- [ ] **Step 6: Commit** + +```bash +git commit -m "Migrate graph edge indices to NodeIdMap64 + Arena32 + +Replace 4 unordered_map> with NodeIdMap64 +storing ArenaSlice references into a shared Arena32." +``` + +--- + +### Task 8: Migrate DirNameKey to per-directory SortedPairVec ✅ + +**Completed.** Replaced `unordered_map` with `std::vector dir_children` indexed by parent directory. Removed 4 types (`DirNameKey`, `DirNameKeyView`, `DirNameKeyHash`, `DirNameKeyEqual`). Modified only `dag.hpp` and `dag.cpp` (no builder.cpp changes needed — public API unchanged). + +**Deviation from plan:** Skipped the two-phase compact-into-Arena32 step. `find_by_dir_name()` is called interleaved with `add_file_node()` during construction, so there's no clean construction/read-only boundary. `SortedPairVec` is already a contiguous array with binary search — cache-friendly enough for 5-30 children per directory. +``` + +--- + +### Task 9: Migrate topo.cpp + +**Files:** +- Modify: `src/graph/topo.cpp` — replace `unordered_map` and `unordered_map` + +Replace with `NodeIdMap32` for color tracking and parent pointers. Initialize with `resize_files(graph.next_file_id)`, `resize_commands(node_id::index(graph.next_command_id))`, etc. + +- [ ] **Step 1: Replace DfsState maps with NodeIdMap32** +- [ ] **Step 2: Build, test `[topo]` and full suite** +- [ ] **Step 3: Commit** + +```bash +git commit -m "Migrate topo.cpp from unordered_map to NodeIdMap32" +``` + +--- + +## Chunk 3: Builder, Evaluator, Scheduler, CLI + +### Task 10: Migrate CommandNode vectors to ArenaSlice + +**Files:** +- Modify: `include/pup/graph/dag.hpp` — change `CommandNode::inputs`/`outputs` from `vector` to `ArenaSlice` +- Modify: `src/graph/dag.cpp` — update `expand_instruction()` and other functions reading inputs/outputs +- Modify: `src/graph/builder.cpp` — update code that populates inputs/outputs during graph building +- Modify: `src/exec/scheduler.cpp` — update job creation (reads inputs/outputs for BuildJob) +- Modify: `src/cli/cmd_show.cpp` — update node display +- Modify: `src/cli/cmd_build.cpp` — update upstream walk, implicit dep handling +- Modify: `include/pup/index/entry.hpp` — update CommandEntry serialization (reads inputs/outputs) +- Modify: `src/index/entry.cpp` — update CommandEntry round-trip code + +This is a pervasive change. The inputs/outputs are read from the Arena via `graph.edge_arena.get(cmd.inputs)`. + +- [ ] **Step 1: Write test for ArenaSlice-backed command operands** + +```cpp +TEST_CASE("CommandNode operands via ArenaSlice", "[dag]") +{ + // Build a graph with known command inputs/outputs + // Verify Arena-backed access returns correct NodeIds +} +``` + +- [ ] **Step 2: Modify CommandNode struct and ALL consumers atomically** +- [ ] **Step 3: Build, test full suite, commit** + +--- + +### Task 11: Migrate VarDb to SortedPairVec + +**Files:** +- Modify: `include/pup/parser/eval.hpp` — change VarDb internals, add `StringPool*` member +- Modify: `src/parser/eval.cpp` — update set/get/contains/append/names/clear + +Replace `unordered_map>` with `SortedPairVec` storing `(StringId, StringId)` pairs. VarDb gets a `StringPool*` member. + +The API remains `string_view` at the boundary. Internally: +- `set(name, value)`: intern both, `pair_vec.insert(name_id, value_id)` +- `get(name)`: `pool_->find(name)` → `pair_vec.find(name_id)` → `pool_->get(value_id)` +- `contains(name)`: `pool_->find(name)` → `pair_vec.find(name_id) != nullptr` + +- [ ] **Step 1: Write test for interned VarDb** + +```cpp +TEST_CASE("VarDb with StringPool interning", "[eval]") +{ + auto pool = pup::StringPool {}; + auto db = pup::parser::VarDb { &pool }; + + SECTION("set and get") + { + db.set("CC", "gcc"); + REQUIRE(db.get("CC") == "gcc"); + } + + SECTION("overwrite") + { + db.set("CC", "gcc"); + db.set("CC", "clang"); + REQUIRE(db.get("CC") == "clang"); + } + + SECTION("contains") + { + db.set("FOO", "bar"); + REQUIRE(db.contains("FOO")); + REQUIRE_FALSE(db.contains("BAR")); + } + + SECTION("get missing returns empty") + { + REQUIRE(db.get("NOPE") == ""); + } +} +``` + +- [ ] **Step 2: Implement, build, test full suite, commit** + +--- + +### Task 12: Migrate builder maps + +**Files:** +- Modify: `include/pup/graph/builder.hpp` — change macro maps, group maps, config/env var tracking +- Modify: `src/graph/builder.cpp` +- Modify: `include/pup/parser/eval.hpp` — update `EvalContext::var_config_deps`/`var_env_deps` pointer types to match new container types in `BuilderState` + +Replace: +- `unordered_map` → intern macro names, use `SortedPairVec` or `IdArray32` (BangMacroDef needs decomposition into StringIds + ArenaSlices) +- `unordered_map>` (groups) → page-table: `IdArray64[group_name_id] → ArenaSlice` +- `unordered_set` (included_files) → `IdBitSet` on interned StringIds +- `set` (exported_vars, used_config_vars, etc.) → `SortedIdVec` +- `unordered_map` → concatenate + intern key, use `IdArray32` +- `unordered_map` (config/env var nodes) → `SortedPairVec` +- `unordered_map>` (var deps) → `IdArray64[name_id] → ArenaSlice` of dep StringIds + +This is the largest task. Break into sub-commits per map type. + +- [ ] **Steps: Modify per map, build, test, commit after each logical group** + +--- + +### Task 13: Migrate scheduler and CLI + +**Files:** +- Modify: `src/exec/scheduler.cpp` — replace `set` with `IdBitSet`, `unordered_map` with `SortedPairVec` +- Modify: `src/cli/cmd_build.cpp` — replace `set`, `set`, `unordered_map` +- Modify: `src/cli/cmd_clean.cpp`, `src/cli/cmd_show.cpp`, etc. + +Mechanical migration: same patterns as builder. + +- [ ] **Steps: Modify, build, test, commit** + +--- + +### Task 14: Migrate remaining set instances + +**Files:** +- Modify: `include/pup/exec/scheduler.hpp` — `BuildJob::exported_vars` from `set` to `SortedIdVec` +- Modify: `include/pup/graph/builder.hpp` — remaining `set` members +- Modify: `include/pup/parser/eval.hpp` — `imported_vars` from `unordered_set` to `IdBitSet` on interned IDs + +- [ ] **Steps: Modify, build, test, commit** + +--- + +### Task 15: Migrate PathCache ✅ + +**Completed.** Replaced `using PathCache = unordered_map` with `struct PathCache { NodeIdMap32 ids; StringPool pool; }`. PathCache owns its own StringPool because `get_full_path` takes `Graph const&` (can't intern into `Graph::strings`). Modified only `dag.hpp` and `dag.cpp` — no caller changes needed (all access through free functions). cmd_build.cpp did not need changes (false positive in original plan). + +--- + +### Task 16: Final cleanup + +**Files:** +- All production files in `src/` and `include/` + +- [ ] **Step 1: Verify no STL container headers remain** + +```bash +grep -rn '\|\|' src/ include/ +``` + +Expected: zero hits (except possibly `` if `std::set` is kept) + +- [ ] **Step 2: Remove string_hash.hpp if unused** + +```bash +grep -rn 'string_hash.hpp' src/ include/ +``` + +If only included by files that no longer use it, remove the header. + +- [ ] **Step 3: Full test suite** + +Run: `make && make test` +Expected: All tests pass + +- [ ] **Step 4: Binary size check** + +```bash +size build/putup +``` + +Expected: `.text` decreases from 927 KB + +- [ ] **Step 5: Commit** + +```bash +git commit -m "Remove , , from production code + +Custom containers migration complete. All maps/sets replaced with +IdArray, IdBitSet, Arena, SortedIdVec/PairVec, and page-table lookups. +The only hash table is StringPool's internal Robin Hood index." +``` + +--- + +## Execution Notes + +**NodeId flag bits:** NodeIds encode type in high bits (bit 31=command, 30=condition, 29=phi). `node_id::index(id)` strips flags to get the per-type array index. `NodeIdMap32`/`NodeIdMap64` dispatch to the correct sub-array. + +**NodeIdMap resize convention:** `resize_files(n)` takes raw NodeId (no flags), so pass `graph.next_file_id`. `resize_commands(n)`, `resize_conditions(n)`, `resize_phis(n)` take the stripped index, so pass `node_id::index(graph.next_command_id)`, etc. The `set(NodeId, value)` method always calls `node_id::index()` internally. + +**StringPool threading:** StringPool must be accessible from all modules that intern strings. During graph construction, a single `StringPool*` is passed through `BuilderContext`. For VarDb, the pool reference is set at construction time. + +**BangMacroDef decomposition:** This struct contains `Expression` members (AST nodes) that reference strings. Full interning of BangMacroDef requires the AST to use StringId instead of std::string. This may be deferred if the AST migration is too invasive — in that case, keep `unordered_map` temporarily and note it as remaining tech debt. + +**DeferredOrderOnlyEdge:** The `set` in builder.hpp is small, transient, and uses a composite comparator. Keep as `std::set` until a later pass — it's not worth a custom container for <100 elements with a two-field comparison. + +**`set>`** in cmd_build.cpp: Keep as `std::set` — small, transient, composite key. Not worth a custom container. + +**PathCache:** `using PathCache = unordered_map` in dag.hpp. Migrate to `NodeIdMap32` mapping `NodeId → StringId` (intern the path string). Must be addressed before Task 15 cleanup. + +**Dependency DAG:** +``` +Task 1 (IdBitSet) ─┐ +Task 2 (IdArray) ───────────┤─→ Task 6 (NodeIdMap) ─→ Task 7 (edge indices) ─→ Task 10 (CommandNode ArenaSlice) +Task 3 (Arena) ─────────────┤ ─→ Task 8 (DirNameKey) + │ ─→ Task 9 (topo.cpp) +Task 4 (SortedIdVec) ───────┤─→ Task 11 (VarDb) +Task 5 (StringPool RH) ─────┤─→ Task 12 (builder maps) + │─→ Task 13 (scheduler + CLI) + │─→ Task 14 (remaining sets) + └─→ Task 15 (cleanup) [depends on ALL] +``` + +Tasks 11-14 do NOT depend on Tasks 6-9. They depend on primitives (1-5) directly. diff --git a/docs/plans/custom-containers.md b/docs/plans/custom-containers.md new file mode 100644 index 0000000..85922c1 --- /dev/null +++ b/docs/plans/custom-containers.md @@ -0,0 +1,289 @@ +# Custom Containers — Design Spec + +## Goal + +Replace `std::unordered_map`, `std::unordered_set`, `std::set`, and eventually `std::vector` with tailored primitives. Part of the `-nostdlib++` trajectory. + +## Core Insight + +If all strings are interned, every key and value in the codebase reduces to a fixed-width integer. The container zoo collapses to 3 primitives + a string pool. + +| Current Type | After Interning | Width | +|---|---|---| +| `std::string` (as key or value) | `StringId` | 32-bit | +| `NodeId` | `NodeId` | 32-bit | +| `DirNameKey` (parent_dir, name) | per-directory `SortedPairVec` | — | +| `Color` (enum) | `uint8_t` (pad to 32) | 32-bit | +| `vector` (edges, inputs) | arena slice `{offset, len}` | 64-bit | +| `set` | sorted `StringId[]` | 32-bit × N | + +## Primitives + +| Primitive | Mechanism | Replaces | +|---|---|---| +| **IdArray** | flat array indexed by dense ID | all `unordered_map` | +| **IdBitSet** | bitset indexed by dense ID | all `set`, `unordered_set` | +| **Arena** | append-only pool + `{offset, len}` slices | `vector`, edge lists, operands | +| **SortedIdVec** | sorted array of integer IDs | small `set`, `set` | +| **StringPool** | the one string↔integer bridge | string interning (already exists) | + +No general-purpose hash table outside of `StringPool`. The directory-name reverse lookup uses per-directory `SortedPairVec` arrays indexed by parent directory ID. + +## Design Principles + +**Tailored, not templated.** No C++ templates. Each primitive has concrete implementations per element width (32 or 64 bit). + +**Raw memory, not std::vector.** Primitives own memory via `malloc`/`realloc`/`free`. This avoids circular dependency when `std::vector` is later replaced. + +**Two-phase lifecycle.** `reserve(n)` for upfront allocation, `compact()` for right-sizing after population. Fits putup's build-once-then-query pattern. + +**Pervasive interning.** All string storage goes through `StringPool`. Variables, paths, commands, display strings — everything becomes a `StringId`. External strings enter the system through `intern()` at the boundary and leave through `get()` at output. + +**Dense over sparse.** Prefer flat arrays (O(1), zero overhead) over hash tables. The only hash table in the system is `StringPool`'s internal index — and even that is a specialized Robin Hood table for `string_view → StringId`, not a general-purpose container. + +## Primitive Designs + +### StringPool (existing — to be expanded) + +Already exists (`core/string_pool.hpp`). Currently used only for `Graph::strings`. Must become the universal interning layer. + +**Expansion:** All string-owning maps (`VarDb`, macro names, group names, env var names, file paths) migrate to `StringId` references into a shared pool. + +**Internal migration:** Replace `unordered_map` index with a Robin Hood table. This is the ONE hash table in the system — specialized for `string_view` keys with `fnv1a` hash. Implemented directly in `string_pool.cpp`. + +**File:** `core/string_pool.{hpp,cpp}` (modified) + +### IdArray + +Flat array indexed by dense integer ID. O(1) read/write. + +``` +capacity: N +data: [slot_0, slot_1, slot_2, ..., slot_N-1] +present: IdBitSet tracking which slots are occupied +``` + +Each slot holds a fixed-width value. Unoccupied slots are zero-initialized. The parallel `IdBitSet` distinguishes "not present" from "present with value 0". + +Two concrete types: +- `IdArray32` — 32-bit values (NodeId, StringId, Color, counts) +- `IdArray64` — 64-bit values (arena slices `{offset:32, len:32}`) + +**Operations:** `set(id, value)`, `get(id) -> value`, `contains(id) -> bool`, `resize(max_id)`, `clear()`, `for_each(callback)`. + +**Implementation:** `malloc`/`realloc`. No constructors/destructors — values are plain integers. + +**File:** `core/id_array.{hpp,cpp}` + +### IdBitSet + +Bitset with capacity equal to max ID. O(1) insert/test, cache-friendly iteration. + +``` +word[0] = bits 0..63 +word[1] = bits 64..127 +... +``` + +**Operations:** `insert(id)`, `contains(id)`, `remove(id)`, `clear()`, `count()`, `for_each(callback)`, `resize(max_id)`. + +**Implementation:** `malloc`/`realloc` for `uint64_t` word array. Concrete type. + +**File:** `core/id_bitset.{hpp,cpp}` + +### Arena + +Append-only pool of integer values. Variable-length lists per entity are stored as `{offset, length}` slice references. + +``` +data: [a₀, a₁, a₂, b₀, b₁, c₀, c₁, c₂, c₃, ...] + ↑ list A ↑ list B ↑ list C +``` + +```cpp +struct ArenaSlice { + uint32_t offset; + uint32_t length; +}; +``` + +An `IdArray64` stores `ArenaSlice` values per entity — e.g., `edges_to[node_id] = {offset=5, length=3}`. + +**Operations:** +- `append(values...) -> ArenaSlice` — add a list, return its slice +- `get(slice) -> {ptr, len}` — access a stored list +- `reserve(total_elements)` — pre-allocate +- `compact()` — shrink to fit + +**Implementation:** Single `malloc`/`realloc` buffer of `uint32_t`. Append-only during build, immutable after compact. + +**File:** `core/arena.{hpp,cpp}` + +### SortedIdVec + +Sorted array of 32-bit IDs with hand-written binary search. For small sparse sets (<100 elements). + +**Operations:** `insert(id)`, `contains(id)`, `remove(id)`, `clear()`, `size()`, range-for. + +**Implementation:** `malloc`/`realloc`. Binary search for lookup. `memmove` to maintain sorted order on insert. + +**Replaces:** `set` (exported_vars), `set` (after interning), small `unordered_set`. + +**File:** `core/sorted_id_vec.{hpp,cpp}` + +## DirNameKey: Per-Directory SortedPairVec ✅ + +**Implemented.** The `dir_name_index` mapped `(parent_dir, name) → NodeId`. Replaced with `std::vector dir_children` indexed by `node_id::index(parent_dir)`. Each directory gets its own sorted array of `(StringId, NodeId)` pairs. Lookup is O(1) dispatch to parent + O(log M) binary search (M = children per directory, typically 5-30). + +The original design proposed a two-phase approach (SortedPairVec during construction, then compact into Arena32). Skipped compaction because `find_by_dir_name()` is called interleaved with `add_file_node()` — no clean boundary — and SortedPairVec is already contiguous and cache-friendly at these sizes. + +**File:** Integrated into `graph/dag.{hpp,cpp}` + +## GroupKey Migration + +`unordered_map` where `GroupKey = (directory, name)`. Group names are identifiers that never contain `/`. + +After interning, `directory` becomes a `StringId` and `name` becomes a `StringId`. Use the same per-directory SortedPairVec approach: + +``` +Level 1: IdArray64[dir_string_id] → ArenaSlice into Level 2 +Level 2: Arena of sorted (StringId, NodeId) pairs per directory +``` + +Or simpler: concatenate `directory + "/" + name` (safe because group names are identifiers with no slashes), intern the result, and use `IdArray32[concatenated_string_id] → NodeId`. + +## VarDb Migration + +`VarDb` currently stores `unordered_map`. After interning: + +```cpp +class VarDb { + IdArray32 vars_; // StringId(name) → StringId(value) + StringPool* pool_; +public: + auto set(std::string_view name, std::string_view value) -> void; + auto get(std::string_view name) const -> std::string_view; + auto contains(std::string_view name) const -> bool; +}; +``` + +Wait — `IdArray32` requires dense keys. StringIds are dense (sequential from the pool). But VarDb typically has ~100-1000 variables while the StringPool may have ~10,000 interned strings. The IdArray would be 10,000 slots with 100 occupied — 99% waste. + +**Better approach:** VarDb uses a `SortedIdVec`-style structure: sorted array of `(StringId, StringId)` pairs. Binary search on the name StringId. For ~100-1000 variables, binary search is ~10 comparisons — fast enough. Insert maintains sorted order via memmove. + +```cpp +class VarDb { + uint32_t* data_; // interleaved [name₀, value₀, name₁, value₁, ...] + size_t count_; + size_t capacity_; + StringPool* pool_; +}; +``` + +Or introduce a **SortedPairVec** (sorted array of `(uint32_t key, uint32_t value)` pairs). + +## Graph Node Migration + +`FileNode` and `CommandNode` currently store `std::string` members. After interning: + +```cpp +struct FileNode { + NodeId id; + StringId name; // was: std::string name + StringId dir_name; // was: std::string dir_name + NodeType type; + // ... +}; + +struct CommandNode { + NodeId id; + StringId command; // was: std::string command + StringId display; // was: std::string display + ArenaSlice inputs; // was: std::vector inputs + ArenaSlice outputs; // was: std::vector outputs + // ... +}; +``` + +Edge lists become `ArenaSlice` references into a shared `Arena32`. This replaces both per-node `vector` allocations and the `unordered_map>` edge index. + +## Remaining `std::set` Instances + +| Instance | Location | Migration | +|---|---|---| +| `set` (exported_vars, config_vars, etc.) | builder.hpp, eval.hpp | `SortedIdVec` after interning | +| `set` (CommandNode::exported_vars) | dag.hpp | `SortedIdVec` | +| `set` | builder.hpp | Keep as-is (transient, <100 elements, composite comparator) | +| `set>` | cmd_build.cpp | `IdBitSet` on packed `(a<<32\|b)`, or keep as-is if count is small | + +## Remaining `std::unordered_*` Instances + +| Instance | Location | Migration | +|---|---|---| +| `unordered_map` (VarDb) | eval.hpp | SortedPairVec (StringId→StringId) | +| `unordered_map` | builder.hpp | Intern key → SortedPairVec or IdArray (BangMacroDef becomes a struct of StringIds + ArenaSlices) | +| `unordered_map>` | builder.hpp (groups) | Page-table: IdArray64[group_name_id] → ArenaSlice | +| `unordered_map` | builder.hpp | Page-table or concatenated StringId | +| `unordered_map` | builder.hpp (config/env var nodes) | IdArray32[interned_name] → NodeId | +| `unordered_map>` | builder.hpp (var deps) | IdArray64[name_id] → ArenaSlice of dep StringIds | +| `unordered_set` | builder.hpp, eval.hpp | SortedIdVec or IdBitSet | +| `unordered_map` | string_pool.hpp | Internal Robin Hood (the one hash table) | +| `unordered_map` (env cache) | scheduler.cpp | SortedPairVec | +| `unordered_map` | cmd_build.cpp | IdArray32 or SortedPairVec | +| `unordered_set` | scheduler.cpp | IdBitSet (job indices are dense) | + +## Migration Order + +### Step 1: Primitives +Build and test all primitives in isolation. No production code changes yet. +- `IdArray32`, `IdArray64` +- `IdBitSet` +- `Arena32` +- `SortedIdVec` +- `SortedPairVec` (sorted `(uint32_t, uint32_t)` pairs) +- `StringPool` internal Robin Hood index + +### Step 2: Graph internals +Migrate `dag.hpp` node types to `StringId` + `ArenaSlice`. Replace edge index maps with `IdArray64`. Replace `dir_name_index` with per-directory `SortedPairVec` ✅. Replace traversal sets with `IdBitSet`. This is the highest-impact change. + +### Step 3: Builder + evaluator +Migrate `VarDb` to `SortedPairVec`. Migrate macro maps, group maps, config/env var tracking. Expand `StringPool` to cover all string storage in builder context. + +### Step 4: Scheduler + CLI +Migrate remaining maps in scheduler and CLI commands. Replace remaining `set` with `SortedIdVec`. + +### Step 5: Cleanup +Remove ``, ``, `` from all production includes. Remove `core/string_hash.hpp`. Verify zero STL container headers remain. + +## Files + +| File | Content | +|---|---| +| `include/pup/core/id_array.hpp` | IdArray32, IdArray64 | +| `src/core/id_array.cpp` | Implementation | +| `include/pup/core/id_bitset.hpp` | IdBitSet | +| `src/core/id_bitset.cpp` | Implementation | +| `include/pup/core/arena.hpp` | Arena32, ArenaSlice | +| `src/core/arena.cpp` | Implementation | +| `include/pup/core/sorted_id_vec.hpp` | SortedIdVec, SortedPairVec | +| `src/core/sorted_id_vec.cpp` | Implementation | +| `core/string_pool.{hpp,cpp}` | Modified — expanded + internal Robin Hood index | + +## Testing + +Each primitive gets its own test file: +- `test_id_array.cpp` — dense access, out-of-range, parallel bitset tracking +- `test_id_bitset.cpp` — insert/contains/remove, clear, count, for_each, resize +- `test_arena.cpp` — append, get slice, reserve, compact +- `test_sorted_id_vec.cpp` — ordering, duplicate insert, contains, remove, binary search +- `test_string_pool.cpp` — expanded: Robin Hood index, large-scale interning + +## Success Criteria + +- Zero ``, ``, `` in production includes +- All tests pass (current 349+ plus new primitive tests) +- Binary `.text` decreases +- No performance regression (self-hosting build time within 10%) +- All string data flows through `StringPool` +- No general-purpose hash table outside of `StringPool` +- Directory-name lookup uses per-directory `SortedPairVec`, not hash table diff --git a/include/pup/cli/config_commands.hpp b/include/pup/cli/config_commands.hpp index 9cdc2b2..68fe1db 100644 --- a/include/pup/cli/config_commands.hpp +++ b/include/pup/cli/config_commands.hpp @@ -3,11 +3,10 @@ #pragma once +#include "pup/core/node_id_map.hpp" #include "pup/core/types.hpp" #include "pup/graph/dag.hpp" -#include -#include #include #include @@ -27,13 +26,13 @@ struct ConfigCommand { /// to resolve the full filesystem path for existence checks. auto find_config_commands( graph::BuildGraph const& graph, - std::filesystem::path const& source_root + std::string const& source_root ) -> std::vector; /// Collect all commands that the given commands depend on (transitively). auto collect_command_dependencies( graph::BuildGraph const& graph, - std::set const& commands -) -> std::set; + NodeIdMap32 const& commands +) -> NodeIdMap32; } // namespace pup::cli diff --git a/include/pup/cli/context.hpp b/include/pup/cli/context.hpp index 73fbf21..1643210 100644 --- a/include/pup/cli/context.hpp +++ b/include/pup/cli/context.hpp @@ -8,11 +8,9 @@ #include "pup/parser/ast.hpp" #include -#include #include #include #include -#include #include #include @@ -89,7 +87,7 @@ class BuildContext { [[nodiscard]] auto vars() const -> parser::VarDb const&; [[nodiscard]] - auto parsed_dirs() const -> std::set const&; + auto parsed_dirs() const -> std::vector const&; /// Get the old index loaded from disk (if any) /// Returns nullptr if no index exists or failed to load @@ -128,8 +126,8 @@ auto make_layout_options(Options const& opts) -> LayoutOptions; /// Context for clean commands struct CleanContext { - std::filesystem::path root; - std::filesystem::path build_dir; + std::string root; + std::string build_dir; bool is_in_tree; }; diff --git a/include/pup/cli/output.hpp b/include/pup/cli/output.hpp index 51200ed..c0f690c 100644 --- a/include/pup/cli/output.hpp +++ b/include/pup/cli/output.hpp @@ -4,10 +4,9 @@ #pragma once #include -#include -#include #include #include +#include namespace pup::cli { @@ -21,15 +20,15 @@ struct OutputMode { struct RemoveResult { std::size_t removed_count = 0; std::size_t error_count = 0; - std::set output_dirs = {}; + std::vector output_dirs = {}; }; /// Remove empty directories from deepest to shallowest /// Returns count of directories removed auto remove_empty_directories( - std::set const& dirs, - std::filesystem::path const& build_dir, - std::filesystem::path const& source_dir, + std::vector const& dirs, + std::string const& build_dir, + std::string const& source_dir, OutputMode mode ) -> std::size_t; diff --git a/include/pup/cli/target.hpp b/include/pup/cli/target.hpp index 7c8b9c4..284b926 100644 --- a/include/pup/cli/target.hpp +++ b/include/pup/cli/target.hpp @@ -5,62 +5,33 @@ #include "pup/core/result.hpp" -#include #include #include #include namespace pup { -/// Parsed target specifying variant, scope, and whether it's an output file struct Target { - std::optional variant; - std::filesystem::path scope_or_output; + std::optional variant; + std::string scope_or_output; bool is_output = false; }; -/// Parse a single target path into its variant and scope/output components -/// -/// Detection logic: -/// 1. If first component contains tup.config, it's a variant -/// 2. Remainder is classified as directory (scope) or file (output) -/// 3. Source files (*.c, *.cpp, etc.) return error -/// -/// @param project_root The project root directory -/// @param target_path The target path to parse (relative to project_root) -/// @return Parsed Target or error [[nodiscard]] auto parse_target( - std::filesystem::path const& project_root, + std::string const& project_root, std::string const& target_path ) -> Result; -/// Expand a glob pattern into multiple targets -/// -/// Patterns like "build-*" expand to all matching variants. -/// Patterns like "build-*/src/lib" expand to variant + scope pairs. -/// -/// @param project_root The project root directory -/// @param pattern Glob pattern (e.g., "build-*", "build-*/src/lib") -/// @return Vector of expanded targets [[nodiscard]] auto expand_glob_target( - std::filesystem::path const& project_root, + std::string const& project_root, std::string const& pattern ) -> std::vector; -/// Validate that multiple targets are consistent -/// -/// All targets must be the same type: -/// - All have explicit variant, OR -/// - All have no variant (applies to all variants) -/// -/// @param project_root The project root directory -/// @param targets Target paths to validate -/// @return Vector of parsed targets, or error [[nodiscard]] auto validate_target_consistency( - std::filesystem::path const& project_root, + std::string const& project_root, std::vector const& targets ) -> Result>; diff --git a/include/pup/core/arena.hpp b/include/pup/core/arena.hpp new file mode 100644 index 0000000..420e954 --- /dev/null +++ b/include/pup/core/arena.hpp @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include +#include + +namespace pup { + +struct ArenaSlice { + std::uint32_t offset = 0; + std::uint32_t length = 0; +}; + +class Arena32 { +public: + Arena32() = default; + ~Arena32(); + + Arena32(Arena32 const&) = delete; + auto operator=(Arena32 const&) -> Arena32& = delete; + + Arena32(Arena32&&) noexcept; + auto operator=(Arena32&&) noexcept -> Arena32&; + + auto append(std::uint32_t const* values, std::uint32_t count) -> ArenaSlice; + + [[nodiscard]] + auto get(ArenaSlice slice) const -> std::uint32_t const*; + + struct Span { + std::uint32_t const* data; + std::uint32_t length; + auto begin() const -> std::uint32_t const* { return data; } + auto end() const -> std::uint32_t const* { return data + length; } + auto operator[](std::uint32_t i) const -> std::uint32_t { return data[i]; } + [[nodiscard]] + auto size() const -> std::uint32_t + { + return length; + } + [[nodiscard]] + auto empty() const -> bool + { + return length == 0; + } + }; + + [[nodiscard]] + auto slice(ArenaSlice s) const -> Span; + + [[nodiscard]] + auto at(std::uint32_t offset) -> std::uint32_t&; + + [[nodiscard]] + auto size() const -> std::size_t; + + auto append_extend(ArenaSlice old, std::uint32_t new_value) -> ArenaSlice; + + auto reserve(std::size_t total_elements) -> void; + auto compact() -> void; + auto clear() -> void; + +private: + std::uint32_t* data_ = nullptr; + std::size_t size_ = 0; + std::size_t capacity_ = 0; + + auto grow(std::size_t needed) -> void; +}; + +} // namespace pup diff --git a/include/pup/core/hash.hpp b/include/pup/core/hash.hpp index ecb46fd..3f1f77e 100644 --- a/include/pup/core/hash.hpp +++ b/include/pup/core/hash.hpp @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include #include namespace pup { @@ -50,7 +50,7 @@ auto sha256(std::string_view data) -> Hash256; /// Compute SHA-256 hash of a file [[nodiscard]] -auto sha256_file(std::filesystem::path const& path) -> Result; +auto sha256_file(std::string const& path) -> Result; /// Convert hash to hex string (lowercase) [[nodiscard]] diff --git a/include/pup/core/id_array.hpp b/include/pup/core/id_array.hpp new file mode 100644 index 0000000..21a5ad8 --- /dev/null +++ b/include/pup/core/id_array.hpp @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include "pup/core/id_bitset.hpp" + +#include +#include + +namespace pup { + +class IdArray32 { +public: + IdArray32() = default; + ~IdArray32(); + + IdArray32(IdArray32 const&) = delete; + auto operator=(IdArray32 const&) -> IdArray32& = delete; + + IdArray32(IdArray32&&) noexcept; + auto operator=(IdArray32&&) noexcept -> IdArray32&; + + auto resize(std::uint32_t max_id) -> void; + auto set(std::uint32_t id, std::uint32_t value) -> void; + + [[nodiscard]] + auto get(std::uint32_t id) const -> std::uint32_t; + + [[nodiscard]] + auto contains(std::uint32_t id) const -> bool; + + auto remove(std::uint32_t id) -> void; + auto clear() -> void; + + [[nodiscard]] + auto count() const -> std::size_t + { + return present_.count(); + } + + auto for_each(void (*fn)(std::uint32_t id, std::uint32_t value, void* ctx), void* ctx) const -> void; + +private: + std::uint32_t* data_ = nullptr; + std::size_t capacity_ = 0; + IdBitSet present_; +}; + +} // namespace pup diff --git a/include/pup/core/id_bitset.hpp b/include/pup/core/id_bitset.hpp new file mode 100644 index 0000000..de443a1 --- /dev/null +++ b/include/pup/core/id_bitset.hpp @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include +#include + +namespace pup { + +class IdBitSet { +public: + IdBitSet() = default; + ~IdBitSet(); + + IdBitSet(IdBitSet const&) = delete; + auto operator=(IdBitSet const&) -> IdBitSet& = delete; + + IdBitSet(IdBitSet&&) noexcept; + auto operator=(IdBitSet&&) noexcept -> IdBitSet&; + + auto resize(std::uint32_t max_id) -> void; + auto insert(std::uint32_t id) -> void; + auto remove(std::uint32_t id) -> void; + + [[nodiscard]] + auto contains(std::uint32_t id) const -> bool; + + auto clear() -> void; + + [[nodiscard]] + auto count() const -> std::size_t; + + auto for_each(void (*fn)(std::uint32_t id, void* ctx), void* ctx) const -> void; + +private: + std::uint64_t* words_ = nullptr; + std::size_t word_count_ = 0; +}; + +} // namespace pup diff --git a/include/pup/core/layout.hpp b/include/pup/core/layout.hpp index 3db84e8..f4e6d6f 100644 --- a/include/pup/core/layout.hpp +++ b/include/pup/core/layout.hpp @@ -3,108 +3,80 @@ #pragma once +#include "pup/core/path.hpp" #include "pup/core/result.hpp" -#include #include +#include #include namespace pup { -/// Encapsulates source/config/output directory layout for a build. -/// Enables read-only source directories by separating source, config, and output trees. -/// -/// Three-tree model: -/// source_root - Where source files live (may be read-only, upstream code) -/// config_root - Where Tupfiles live (may mirror source structure) -/// output_root - Where outputs/.pup/tup.config go (writable) -/// -/// Traditional builds: config_root == source_root (Tupfiles alongside sources) -/// Out-of-tree config: config_root != source_root (separate config tree) struct ProjectLayout { - std::filesystem::path source_root; ///< Where source files live (may be read-only) - std::filesystem::path config_root; ///< Where Tupfiles live (may be separate from source) - std::filesystem::path output_root; ///< Where outputs/.pup/tup.config go (writable) + std::string source_root; + std::string config_root; + std::string output_root; - /// True if source and output are the same (in-tree build) [[nodiscard]] auto is_in_tree() const -> bool { return source_root == output_root; } - /// True if config tree is separate from source tree [[nodiscard]] auto has_separate_config() const -> bool { return config_root != source_root; } - /// Get path to .pup directory [[nodiscard]] - auto pup_dir() const -> std::filesystem::path + auto pup_dir() const -> std::string { - return output_root / ".pup"; + return path::join(output_root, ".pup"); } - /// Get path to index file [[nodiscard]] - auto index_path() const -> std::filesystem::path + auto index_path() const -> std::string { - return pup_dir() / "index"; + return path::join(pup_dir(), "index"); } - /// Resolve a source-relative path to absolute [[nodiscard]] - auto resolve_source( - std::filesystem::path const& rel - ) const -> std::filesystem::path + auto resolve_source(std::string const& rel) const -> std::string { - return source_root / rel; + return path::join(source_root, rel); } - /// Resolve a config-relative path to absolute [[nodiscard]] - auto resolve_config( - std::filesystem::path const& rel - ) const -> std::filesystem::path + auto resolve_config(std::string const& rel) const -> std::string { - return config_root / rel; + return path::join(config_root, rel); } - /// Resolve an output-relative path to absolute [[nodiscard]] - auto resolve_output( - std::filesystem::path const& rel - ) const -> std::filesystem::path + auto resolve_output(std::string const& rel) const -> std::string { - return output_root / rel; + return path::join(output_root, rel); } }; -/// Options for layout discovery struct LayoutOptions { - std::optional source_dir; ///< -S argument - std::optional config_dir; ///< -C argument - std::optional build_dir; ///< -B argument + std::optional source_dir; + std::optional config_dir; + std::optional build_dir; }; -/// Discover project layout from options and environment. -/// Priority: CLI args > environment variables > cwd detection [[nodiscard]] auto discover_layout(LayoutOptions const& opts = {}) -> Result; -/// Find project root by walking up from start_dir looking for Tupfile.ini [[nodiscard]] auto find_project_root( - std::filesystem::path const& start_dir -) -> std::optional; + std::string const& start_dir +) -> std::optional; -/// Discover variant directories (subdirs containing tup.config or .pup/) -/// Returns sorted list of variant paths relative to source_root [[nodiscard]] auto discover_variants( - std::filesystem::path const& source_root -) -> std::vector; + std::string const& source_root +) -> std::vector; } // namespace pup diff --git a/include/pup/core/node_id_map.hpp b/include/pup/core/node_id_map.hpp new file mode 100644 index 0000000..4838bc8 --- /dev/null +++ b/include/pup/core/node_id_map.hpp @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include "pup/core/arena.hpp" +#include "pup/core/id_array.hpp" +#include "pup/core/types.hpp" + +namespace pup { + +class NodeIdMap32 { +public: + auto resize_files(std::uint32_t max_idx) -> void { files_.resize(max_idx); } + auto resize_commands(std::uint32_t max_idx) -> void { cmds_.resize(max_idx); } + auto resize_conditions(std::uint32_t max_idx) -> void { conds_.resize(max_idx); } + auto resize_phis(std::uint32_t max_idx) -> void { phis_.resize(max_idx); } + + auto set(NodeId id, std::uint32_t value) -> void + { + auto idx = static_cast(node_id::index(id)); + select(id).set(idx, value); + } + + [[nodiscard]] + auto get(NodeId id) const -> std::uint32_t + { + auto idx = static_cast(node_id::index(id)); + return select_const(id).get(idx); + } + + [[nodiscard]] + auto contains(NodeId id) const -> bool + { + auto idx = static_cast(node_id::index(id)); + return select_const(id).contains(idx); + } + + auto remove(NodeId id) -> void + { + auto idx = static_cast(node_id::index(id)); + select(id).remove(idx); + } + + auto clear() -> void + { + files_.clear(); + cmds_.clear(); + conds_.clear(); + phis_.clear(); + } + + [[nodiscard]] + auto size() const -> std::size_t + { + return files_.count() + cmds_.count() + conds_.count() + phis_.count(); + } + + [[nodiscard]] + auto empty() const -> bool + { + return size() == 0; + } + +private: + IdArray32 files_, cmds_, conds_, phis_; + + auto select(NodeId id) -> IdArray32& + { + if (node_id::is_command(id)) { + return cmds_; + } + if (node_id::is_condition(id)) { + return conds_; + } + if (node_id::is_phi(id)) { + return phis_; + } + return files_; + } + + auto select_const(NodeId id) const -> IdArray32 const& + { + if (node_id::is_command(id)) { + return cmds_; + } + if (node_id::is_condition(id)) { + return conds_; + } + if (node_id::is_phi(id)) { + return phis_; + } + return files_; + } +}; + +/// Maps NodeId → ArenaSlice via two parallel NodeIdMap32 (offset + length). +/// Absent nodes return {0, 0}. Invariant: set_slice is never called with +/// length == 0, so length == 0 reliably means "not present". +class NodeIdArenaIndex { +public: + [[nodiscard]] + auto get_slice(NodeId id) const -> ArenaSlice + { + if (!offsets_.contains(id)) { + return { 0, 0 }; + } + return { offsets_.get(id), lengths_.get(id) }; + } + + auto set_slice(NodeId id, ArenaSlice s) -> void + { + offsets_.set(id, s.offset); + lengths_.set(id, s.length); + } + + [[nodiscard]] + auto contains(NodeId id) const -> bool + { + return offsets_.contains(id); + } + + auto resize_files(std::uint32_t n) -> void + { + offsets_.resize_files(n); + lengths_.resize_files(n); + } + auto resize_commands(std::uint32_t n) -> void + { + offsets_.resize_commands(n); + lengths_.resize_commands(n); + } + auto resize_conditions(std::uint32_t n) -> void + { + offsets_.resize_conditions(n); + lengths_.resize_conditions(n); + } + auto resize_phis(std::uint32_t n) -> void + { + offsets_.resize_phis(n); + lengths_.resize_phis(n); + } + + auto clear() -> void + { + offsets_.clear(); + lengths_.clear(); + } + +private: + NodeIdMap32 offsets_; + NodeIdMap32 lengths_; +}; + +} // namespace pup diff --git a/include/pup/core/path.hpp b/include/pup/core/path.hpp new file mode 100644 index 0000000..6485039 --- /dev/null +++ b/include/pup/core/path.hpp @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include +#include +#include + +namespace pup { + +/// A filesystem path is a UTF-8 encoded string using '/' as separator. +/// Windows native conversion happens at the platform boundary (file_io-win32.cpp). +using Path = std::string; + +/// Path string operations. All functions operate on forward-slash-separated +/// UTF-8 paths without touching the filesystem. +namespace path { + +/// Join two path segments with '/'. +/// join("src", "foo.c") → "src/foo.c" +/// join("src/", "foo.c") → "src/foo.c" +/// join("", "foo.c") → "foo.c" +/// join("src", "/usr/include") → "/usr/include" (absolute rhs replaces) +[[nodiscard]] +auto join(std::string_view a, std::string_view b) -> std::string; + +/// Get the parent directory. +/// parent("src/lib/foo.c") → "src/lib" +/// parent("foo.c") → "" +/// parent("") → "" +/// parent("/") → "/" +[[nodiscard]] +auto parent(std::string_view p) -> std::string_view; + +/// Get the filename component (after last '/'). +/// filename("src/foo.c") → "foo.c" +/// filename("foo.c") → "foo.c" +/// filename("src/") → "" +[[nodiscard]] +auto filename(std::string_view p) -> std::string_view; + +/// Get the stem (filename without extension). +/// stem("src/foo.tar.gz") → "foo.tar" +/// stem("Makefile") → "Makefile" +[[nodiscard]] +auto stem(std::string_view p) -> std::string_view; + +/// Get the file extension (including dot). +/// extension("foo.c") → ".c" +/// extension("foo.tar.gz") → ".gz" +/// extension("Makefile") → "" +[[nodiscard]] +auto extension(std::string_view p) -> std::string_view; + +/// Check if a path is absolute. +/// POSIX: starts with '/' +/// Windows: starts with drive letter (C:/) or UNC (//) +[[nodiscard]] +auto is_absolute(std::string_view p) -> bool; + +/// Lexically normalize a path by resolving '.' and '..' segments. +/// normalize("src/../include/./foo.h") → "include/foo.h" +/// normalize("/a/b/../c") → "/a/c" +/// Does not touch the filesystem. +[[nodiscard]] +auto normalize(std::string_view p) -> std::string; + +/// Compute relative path from base to target (lexical, no filesystem access). +/// relative("a/b/c", "a") → "b/c" +/// relative("a/b", "a/b") → "." +/// relative("x/y", "a/b") → "../../x/y" +[[nodiscard]] +auto relative(std::string_view target, std::string_view base) -> std::string; + +} // namespace path +} // namespace pup diff --git a/include/pup/core/path_utils.hpp b/include/pup/core/path_utils.hpp index 0bf5a2f..d32d91b 100644 --- a/include/pup/core/path_utils.hpp +++ b/include/pup/core/path_utils.hpp @@ -3,7 +3,6 @@ #pragma once -#include #include #include #include @@ -11,70 +10,46 @@ namespace pup { -/// Check if path is under root directory (handles trailing separators). -/// Returns true if path == root or path is a descendant of root. [[nodiscard]] auto is_path_under( - std::filesystem::path const& path, - std::filesystem::path const& root + std::string const& path, + std::string const& root ) -> bool; -/// Get relative path from root, or empty string if not under root. [[nodiscard]] auto relative_to_root( - std::filesystem::path const& path, - std::filesystem::path const& root + std::string const& path, + std::string const& root ) -> std::string; -/// Check if string path is under scope prefix (directory-boundary aware). -/// Empty scope matches all paths. [[nodiscard]] auto is_path_in_scope( std::string_view path, std::string_view scope ) -> bool; -/// Check if path is under any of the given scopes. -/// Empty scopes vector matches all paths. [[nodiscard]] auto is_path_in_any_scope( std::string_view path, std::vector const& scopes ) -> bool; -/// Compute the "../" prefix needed to navigate from source_dir back to project root. -/// E.g., "src/lib" → "../../", "" → "" [[nodiscard]] auto compute_source_to_root(std::string_view source_dir) -> std::string; -/// Strip a prefix from a path if present. -/// E.g., strip_path_prefix("build/src/foo.o", "build") → "src/foo.o" [[nodiscard]] auto strip_path_prefix( std::string_view path, std::string_view prefix ) -> std::string; -/// Check if path (when resolved from source_root) is under target_root. -/// If so, return the relative path within target_root. -/// E.g., resolve_under_root("../build/include/foo.h", "/src", "/src/../build") -/// → "include/foo.h" [[nodiscard]] auto resolve_under_root( std::string_view path, - std::filesystem::path const& source_root, - std::filesystem::path const& target_root + std::string const& source_root, + std::string const& target_root ) -> std::optional; -/// Transform a project-root-relative path to be relative to a Tupfile's source directory. -/// Used for command expansion where commands run from the Tupfile directory. -/// -/// Examples (source_dir = "src/lib", source_to_root = "../../"): -/// "src/lib/foo.c" → "foo.c" (local file, strip prefix) -/// "src/lib" → "." (exact match) -/// "include/bar.h" → "../../include/bar.h" (other dir, prepend source_to_root) -/// "../data.txt" → "../../../data.txt" (already relative, prepend source_to_root) -/// "/usr/include" → "/usr/include" (absolute, unchanged) [[nodiscard]] auto make_source_relative( std::string_view path, diff --git a/include/pup/core/sorted_id_vec.hpp b/include/pup/core/sorted_id_vec.hpp new file mode 100644 index 0000000..a37cd8f --- /dev/null +++ b/include/pup/core/sorted_id_vec.hpp @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include +#include + +namespace pup { + +class SortedIdVec { +public: + SortedIdVec() = default; + ~SortedIdVec(); + + SortedIdVec(SortedIdVec const&) = delete; + auto operator=(SortedIdVec const&) -> SortedIdVec& = delete; + + SortedIdVec(SortedIdVec&&) noexcept; + auto operator=(SortedIdVec&&) noexcept -> SortedIdVec&; + + auto insert(std::uint32_t id) -> bool; + + [[nodiscard]] + auto contains(std::uint32_t id) const -> bool; + + auto remove(std::uint32_t id) -> bool; + auto clear() -> void; + + [[nodiscard]] + auto empty() const -> bool + { + return size_ == 0; + } + + [[nodiscard]] + auto size() const -> std::size_t; + + auto merge_from(SortedIdVec const& other) -> void; + + [[nodiscard]] + auto data() const -> std::uint32_t const*; + + [[nodiscard]] + auto begin() const -> std::uint32_t const* + { + return data_; + } + + [[nodiscard]] + auto end() const -> std::uint32_t const* + { + return data_ + size_; + } + + auto for_each(void (*fn)(std::uint32_t id, void* ctx), void* ctx) const -> void; + +private: + std::uint32_t* data_ = nullptr; + std::size_t size_ = 0; + std::size_t capacity_ = 0; + + auto grow() -> void; + [[nodiscard]] + auto lower_bound(std::uint32_t id) const -> std::size_t; +}; + +class SortedPairVec { +public: + SortedPairVec() = default; + ~SortedPairVec(); + + SortedPairVec(SortedPairVec const&) = delete; + auto operator=(SortedPairVec const&) -> SortedPairVec& = delete; + + SortedPairVec(SortedPairVec&&) noexcept; + auto operator=(SortedPairVec&&) noexcept -> SortedPairVec&; + + auto insert(std::uint32_t key, std::uint32_t value) -> bool; + + [[nodiscard]] + auto find(std::uint32_t key) const -> std::uint32_t const*; + + [[nodiscard]] + auto contains(std::uint32_t key) const -> bool; + + auto remove(std::uint32_t key) -> bool; + auto clear() -> void; + + [[nodiscard]] + auto empty() const -> bool + { + return size_ == 0; + } + + [[nodiscard]] + auto size() const -> std::size_t; + + auto for_each(void (*fn)(std::uint32_t key, std::uint32_t value, void* ctx), void* ctx) const -> void; + +private: + struct Pair { + std::uint32_t key; + std::uint32_t value; + }; + + Pair* data_ = nullptr; + std::size_t size_ = 0; + std::size_t capacity_ = 0; + + auto grow() -> void; + [[nodiscard]] + auto lower_bound(std::uint32_t key) const -> std::size_t; +}; + +} // namespace pup diff --git a/include/pup/core/string_hash.hpp b/include/pup/core/string_hash.hpp new file mode 100644 index 0000000..399324a --- /dev/null +++ b/include/pup/core/string_hash.hpp @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#pragma once + +#include +#include +#include + +namespace pup { + +/// Transparent hash for heterogeneous lookup in unordered containers. +/// Enables find(string_view) on unordered_map. +struct StringHash { + using is_transparent = void; + + auto operator()(std::string_view sv) const noexcept -> std::size_t + { + return std::hash {}(sv); + } + + auto operator()(std::string const& s) const noexcept -> std::size_t + { + return std::hash {}(s); + } +}; + +} // namespace pup diff --git a/include/pup/core/string_pool.hpp b/include/pup/core/string_pool.hpp index a7e17bf..f15e77e 100644 --- a/include/pup/core/string_pool.hpp +++ b/include/pup/core/string_pool.hpp @@ -5,10 +5,10 @@ #include "pup/core/string_id.hpp" +#include #include #include #include -#include namespace pup { @@ -53,8 +53,21 @@ class StringPool { auto reserve(std::size_t count) -> void; private: + struct Meta { + std::uint32_t hash = 0; + std::uint16_t displacement = 0; + }; + + auto key_at(std::size_t slot) const -> std::string_view; + auto probe_find(std::uint32_t h, std::string_view key) const -> StringId; + auto probe_insert(std::uint32_t h, StringId id) -> void; + auto grow() -> void; + std::deque storage_; - std::unordered_map index_; + Meta* meta_ = nullptr; + StringId* values_ = nullptr; + std::size_t index_capacity_ = 0; + std::size_t index_count_ = 0; }; } // namespace pup diff --git a/include/pup/exec/runner.hpp b/include/pup/exec/runner.hpp index c720712..46c5e5f 100644 --- a/include/pup/exec/runner.hpp +++ b/include/pup/exec/runner.hpp @@ -7,7 +7,6 @@ #include "pup/core/types.hpp" #include -#include #include #include #include @@ -28,7 +27,7 @@ struct CommandResult { /// Options for running a command struct RunOptions { - std::filesystem::path working_dir = {}; + std::string working_dir = {}; std::vector env = {}; ///< Additional environment variables bool inherit_env = true; ///< Inherit parent environment std::optional timeout = {}; ///< Command timeout @@ -66,7 +65,7 @@ class CommandRunner { ) -> Result; /// Set the default working directory - auto set_working_dir(std::filesystem::path dir) -> void + auto set_working_dir(std::string dir) -> void { default_options_.working_dir = std::move(dir); } diff --git a/include/pup/exec/scheduler.hpp b/include/pup/exec/scheduler.hpp index bfad768..92b5656 100644 --- a/include/pup/exec/scheduler.hpp +++ b/include/pup/exec/scheduler.hpp @@ -3,19 +3,17 @@ #pragma once +#include "pup/core/node_id_map.hpp" #include "pup/core/result.hpp" #include "pup/core/types.hpp" #include "runner.hpp" #include -#include #include #include #include -#include #include #include -#include #include namespace pup::graph { @@ -29,11 +27,11 @@ struct BuildJob { NodeId id = 0; std::string command = {}; std::string display = {}; - std::filesystem::path working_dir = {}; + std::string working_dir = {}; std::vector inputs = {}; std::vector outputs = {}; std::vector order_only_inputs = {}; ///< Order-only dependencies - std::set exported_vars = {}; ///< Env vars to export to command + std::vector exported_vars = {}; ///< Env vars to export to command // For auto-generated rules (from pattern matching) bool capture_stdout = false; ///< Capture stdout for depfile parsing @@ -62,13 +60,13 @@ using ProgressCallback = std::function timeout = {}; ///< Per-command timeout }; @@ -109,7 +107,7 @@ class Scheduler { [[nodiscard]] auto build_subset( graph::BuildGraph const& graph, - std::set const& command_ids + NodeIdMap32 const& command_ids ) -> Result; /// Build specific targets and all required dependencies. @@ -151,20 +149,6 @@ class Scheduler { graph::BuildGraph const& graph ) -> Result; - /// Execute jobs sequentially with dependency ordering - auto execute_sequential( - std::vector const& jobs, - graph::BuildGraph const& graph, - std::unordered_map const& env_cache - ) -> Result; - - /// Execute a single job - auto execute_job( - BuildJob const& job, - CommandRunner& runner, - std::unordered_map const& env_cache - ) -> JobResult; - /// Build job list from graph in topological order [[nodiscard]] auto build_job_list( @@ -175,7 +159,7 @@ class Scheduler { [[nodiscard]] auto filter_jobs( std::vector const& all_jobs, - std::set const& affected_nodes + NodeIdMap32 const& affected_nodes ) -> std::vector; }; diff --git a/include/pup/graph/builder.hpp b/include/pup/graph/builder.hpp index f8d899b..c256505 100644 --- a/include/pup/graph/builder.hpp +++ b/include/pup/graph/builder.hpp @@ -5,16 +5,12 @@ #include "dag.hpp" #include "pup/core/result.hpp" +#include "pup/core/sorted_id_vec.hpp" +#include "pup/core/string_id.hpp" #include "pup/parser/ast.hpp" #include "pup/parser/eval.hpp" -#include -#include -#include -#include #include -#include -#include #include namespace pup::parser { @@ -31,16 +27,16 @@ class RulePatternRegistry; /// Options for graph building struct BuilderOptions { - std::filesystem::path source_root; ///< Source tree root (where source files live) - std::filesystem::path config_root; ///< Config tree root (where Tupfiles live) - std::filesystem::path output_root; ///< Output tree root (where outputs/.pup go) - std::filesystem::path config_path; ///< Path to tup.config (for sticky edge tracking) - bool expand_globs = true; ///< Expand glob patterns - bool validate_inputs = true; ///< Check that input files exist - bool verbose = false; ///< Print verbose output - DepScannerRegistry const* scanner_registry = nullptr; ///< Optional scanner registry for implicit deps - RulePatternRegistry const* pattern_registry = nullptr; ///< Optional pattern registry for auto-generated rules - std::unordered_map cached_env_vars = {}; ///< Cached env vars from previous build + std::string source_root; ///< Source tree root (where source files live) + std::string config_root; ///< Config tree root (where Tupfiles live) + std::string output_root; ///< Output tree root (where outputs/.pup go) + std::string config_path; ///< Path to tup.config (for sticky edge tracking) + bool expand_globs = true; ///< Expand glob patterns + bool validate_inputs = true; ///< Check that input files exist + bool verbose = false; ///< Print verbose output + DepScannerRegistry const* scanner_registry = nullptr; ///< Optional scanner registry for implicit deps + RulePatternRegistry const* pattern_registry = nullptr; ///< Optional pattern registry for auto-generated rules + std::vector> cached_env_vars = {}; ///< Cached env vars from previous build (sorted by key) }; /// Bang macro definition @@ -61,8 +57,39 @@ struct BangMacroDef { struct PendingWeakAssignment { std::string name; std::string value; - std::set config_deps; ///< Config vars used in RHS - std::set env_deps; ///< Env vars used in RHS + SortedIdVec config_deps; ///< Config var StringIds used in RHS + SortedIdVec env_deps; ///< Env var StringIds used in RHS +}; + +// ============================================================================ +// GroupMemberTable - maps interned group name to its list of member NodeIds +// ============================================================================ + +struct GroupMemberTable { + SortedPairVec name_to_idx; ///< Interned group name → index into pool + std::vector> pool; ///< pool[idx] = member NodeIds + + [[nodiscard]] + auto find(std::uint32_t key) const -> std::vector const* + { + auto const* idx = name_to_idx.find(key); + if (!idx) { + return nullptr; + } + return &pool[*idx]; + } + + auto get_or_create(std::uint32_t key) -> std::vector& + { + auto const* idx = name_to_idx.find(key); + if (idx) { + return pool[*idx]; + } + auto new_idx = static_cast(pool.size()); + pool.emplace_back(); + name_to_idx.insert(key, new_idx); + return pool.back(); + } }; /// Context for building the graph (per-Tupfile state) @@ -72,20 +99,20 @@ struct BuilderContext { parser::VarDb* vars = nullptr; ///< Variable database for import BuilderOptions options = {}; - std::unordered_map macros = {}; - std::unordered_map> groups = {}; - std::unordered_set included_files = {}; - std::set exported_vars = {}; ///< Environment variables to export to commands + std::vector> macros = {}; ///< Sorted by interned name key + GroupMemberTable groups = {}; + SortedIdVec included_files = {}; + SortedIdVec exported_vars = {}; ///< Interned environment variable names to export - std::filesystem::path current_dir = {}; - std::filesystem::path current_file = {}; + std::string current_dir = {}; + std::string current_file = {}; std::vector sticky_sources = {}; ///< Tupfile + included files for sticky edges - /// Config variables used during current command expansion (cleared per command) - std::set used_config_vars = {}; + /// Config variable StringIds used during current command expansion (cleared per command) + SortedIdVec used_config_vars = {}; - /// Env variables used during current command expansion (cleared per command) - std::set used_env_vars = {}; + /// Env variable StringIds used during current command expansion (cleared per command) + SortedIdVec used_env_vars = {}; std::vector errors = {}; std::vector warnings = {}; @@ -98,38 +125,59 @@ struct BuilderContext { /// will have these guards applied. std::vector condition_stack = {}; - /// Config variables used in enclosing conditions (for phi-node model). + /// Config variable StringIds used in enclosing conditions (for phi-node model). /// Commands inside conditionals need to depend on these vars to rebuild when /// the condition's value changes. - std::set condition_config_vars = {}; + SortedIdVec condition_config_vars = {}; }; // ============================================================================ -// BuilderState - Persistent state across multiple Tupfiles +// VarDepTracker - maps variable StringId to its set of dependency StringIds // ============================================================================ -/// Key for cross-directory group lookup -struct GroupKey { - std::string directory; - std::string name; +struct VarDepTracker { + SortedPairVec name_to_idx; ///< StringId → index into pool + std::vector pool; ///< pool[idx] = dep set of StringIds - auto operator==(GroupKey const& other) const -> bool = default; - auto operator<(GroupKey const& other) const -> bool + [[nodiscard]] + auto find(StringId key) const -> SortedIdVec const* { - return std::tie(directory, name) < std::tie(other.directory, other.name); + auto const* idx = name_to_idx.find(to_underlying(key)); + if (!idx) { + return nullptr; + } + return &pool[*idx]; + } + + auto get_or_create(StringId key) -> SortedIdVec& + { + auto const* idx = name_to_idx.find(to_underlying(key)); + if (idx) { + return pool[*idx]; + } + auto new_idx = static_cast(pool.size()); + pool.emplace_back(); + name_to_idx.insert(to_underlying(key), new_idx); + return pool.back(); + } + + [[nodiscard]] + auto empty() const -> bool + { + return name_to_idx.empty(); } -}; -/// Hash function for GroupKey -struct GroupKeyHash { - auto operator()(GroupKey const& k) const -> std::size_t + auto clear() -> void { - auto h1 = std::hash {}(k.directory); - auto h2 = std::hash {}(k.name); - return h1 ^ (h2 << 1); + name_to_idx.clear(); + pool.clear(); } }; +// ============================================================================ +// BuilderState - Persistent state across multiple Tupfiles +// ============================================================================ + /// Deferred order-only edge reference for circular parsing situations struct DeferredOrderOnlyEdge { NodeId group_id; @@ -139,6 +187,8 @@ struct DeferredOrderOnlyEdge { { return std::tie(group_id, command_id) < std::tie(other.group_id, other.command_id); } + + auto operator==(DeferredOrderOnlyEdge const& other) const -> bool = default; }; /// Per-session state that persists across multiple Tupfiles @@ -147,32 +197,29 @@ struct BuilderState { std::vector errors; std::vector warnings; - /// Group node lookup: (directory, name) → NodeId - std::unordered_map group_nodes; + /// Group node lookup: interned "directory/name" StringId → NodeId + SortedPairVec group_nodes; /// Deferred edges to resolve after all Tupfiles are parsed - std::set deferred_edges; + std::vector deferred_edges; - /// Config variable nodes (name -> NodeId) for fine-grained dependency tracking - std::unordered_map config_var_nodes; + /// Config variable nodes (interned name StringId → NodeId) + SortedPairVec config_var_nodes; /// Virtual $ directory for imported environment variables (like tup's env_dt) NodeId env_var_dir_id = INVALID_NODE_ID; - /// Imported environment variable nodes (var_name -> NodeId) - std::unordered_map imported_env_var_nodes; + /// Imported environment variable nodes (interned name StringId → NodeId) + SortedPairVec imported_env_var_nodes; - /// Set of imported variable names (for tracking which vars are imported) - std::unordered_set imported_var_names; + /// Set of imported variable name StringIds + SortedIdVec imported_var_names; /// Track which regular variables depend on config vars (for transitive tracking) - /// When CXXFLAGS = @(RELEASE_CXXFLAGS), record: var_config_deps["CXXFLAGS"] = {"RELEASE_CXXFLAGS"} - std::unordered_map, parser::StringHash, std::equal_to<>> - var_config_deps; + VarDepTracker var_config_deps; /// Track which regular variables depend on imported env vars (for transitive tracking) - std::unordered_map, parser::StringHash, std::equal_to<>> - var_env_deps; + VarDepTracker var_env_deps; }; // ============================================================================ diff --git a/include/pup/graph/dag.hpp b/include/pup/graph/dag.hpp index 64c754b..ff0852d 100644 --- a/include/pup/graph/dag.hpp +++ b/include/pup/graph/dag.hpp @@ -3,27 +3,29 @@ #pragma once +#include "pup/core/arena.hpp" +#include "pup/core/node_id_map.hpp" #include "pup/core/result.hpp" +#include "pup/core/sorted_id_vec.hpp" #include "pup/core/string_id.hpp" #include "pup/core/string_pool.hpp" #include "pup/core/types.hpp" #include "pup/graph/rule_pattern.hpp" #include -#include -#include #include -#include #include #include #include -#include #include namespace pup::graph { -/// Path cache - maps NodeId to full path string for efficiency -using PathCache = std::unordered_map; +/// Path cache - maps NodeId to interned full path string +struct PathCache { + NodeIdMap32 ids; ///< NodeId → StringId (path interned in pool) + StringPool pool; ///< Owns the full path strings +}; /// Edge between nodes in the build graph struct Edge { @@ -66,7 +68,7 @@ struct CommandNode { std::vector inputs = {}; ///< Operand file NodeIds for %f expansion std::vector outputs = {}; ///< Operand file NodeIds for %o expansion - std::set exported_vars = {}; ///< Env vars to export to command (interned) + SortedIdVec exported_vars = {}; ///< Env vars to export to command (interned StringIds) // For generated rules (auto-generated from pattern matching) std::optional generated_output = {}; ///< Output specification @@ -99,86 +101,6 @@ struct PhiNode { NodeId else_output = INVALID_NODE_ID; ///< Output when condition is false }; -/// Key for directory + name lookup (interned) -struct DirNameKey { - NodeId parent_dir = 0; - StringId name = StringId::Empty; - - auto operator==(DirNameKey const& other) const -> bool = default; -}; - -/// View for zero-allocation DirNameKey lookup -struct DirNameKeyView { - NodeId parent_dir = 0; - std::string_view name = {}; -}; - -/// Hash function for DirNameKey with transparent lookup support -struct DirNameKeyHash { - using is_transparent = void; - - StringPool const* pool = nullptr; - - auto operator()(DirNameKey const& key) const noexcept -> std::size_t - { - auto h1 = std::hash {}(key.parent_dir); - // Hash the actual string content to match DirNameKeyView hashing - auto name_sv = pool ? pool->get(key.name) : std::string_view {}; - auto h2 = std::hash {}(name_sv); - return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2)); - } - - auto operator()(DirNameKeyView const& view) const noexcept -> std::size_t - { - auto h1 = std::hash {}(view.parent_dir); - auto h2 = std::hash {}(view.name); - return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2)); - } -}; - -/// Equality for DirNameKey with transparent lookup support -struct DirNameKeyEqual { - using is_transparent = void; - - StringPool const* pool = nullptr; - - auto operator()(DirNameKey const& a, DirNameKey const& b) const -> bool - { - return a == b; - } - - auto operator()(DirNameKey const& a, DirNameKeyView const& b) const -> bool - { - if (a.parent_dir != b.parent_dir) { - return false; - } - if (!pool) { - return false; - } - return pool->get(a.name) == b.name; - } - - auto operator()(DirNameKeyView const& a, DirNameKey const& b) const -> bool - { - return (*this)(b, a); - } -}; - -/// Transparent hash for string heterogeneous lookup -struct StringHash { - using is_transparent = void; - - auto operator()(std::string_view sv) const noexcept -> std::size_t - { - return std::hash {}(sv); - } - - auto operator()(std::string const& s) const noexcept -> std::size_t - { - return std::hash {}(s); - } -}; - /// Build graph - DAG of nodes and edges (plain data struct) struct Graph { StringPool strings; ///< Interned string storage @@ -189,17 +111,16 @@ struct Graph { std::deque phi_nodes; ///< Phi nodes (merge conditional outputs) std::vector edges; ///< Central edge storage (single source of truth) - // Edge indices: map NodeId -> indices into edges vector - std::unordered_map> edges_to_index; ///< Edges pointing TO this node - std::unordered_map> edges_from_index; ///< Edges pointing FROM this node - - // Order-only edges stored separately (not in edges vector) - std::unordered_map> order_only_to_index; ///< Order-only deps OF this node - std::unordered_map> order_only_dependents; ///< Nodes depending on this + Arena32 edge_arena; + NodeIdArenaIndex edges_to_index; + NodeIdArenaIndex edges_from_index; + NodeIdArenaIndex order_only_to_index; + NodeIdArenaIndex order_only_dependents; - // Node lookup indices (with transparent lookup support) - std::unordered_map dir_name_index; - std::unordered_map> command_str_index; + // Node lookup indices + std::vector dir_children; ///< Per-directory name→NodeId index (indexed by parent dir) + StringPool command_strings; ///< Interned expanded command strings + SortedPairVec command_index; ///< StringId(command) → NodeId bool command_index_built = false; NodeId next_file_id = 2; ///< Next file node ID (starts at 2, BUILD_ROOT is 1) @@ -397,8 +318,8 @@ auto expand_instruction( Graph const& graph, NodeId cmd_id, PathCache& cache, - std::filesystem::path const& source_root, - std::filesystem::path const& config_root = {} + std::string const& source_root, + std::string const& config_root = {} ) -> std::string; /// Expand instruction pattern (convenience overload, creates temporary cache) @@ -687,7 +608,7 @@ class BuildGraph { auto set_build_root_name(std::string name) -> void { graph::set_build_root_name(graph_, std::move(name)); - path_cache_.clear(); + graph::clear_path_cache(path_cache_); } [[nodiscard]] diff --git a/include/pup/graph/rule_pattern.hpp b/include/pup/graph/rule_pattern.hpp index dd47390..32a74a1 100644 --- a/include/pup/graph/rule_pattern.hpp +++ b/include/pup/graph/rule_pattern.hpp @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include namespace pup::graph { @@ -55,7 +55,7 @@ struct GeneratedRule { /// Pattern that generates additional rules when matched struct RulePattern { - std::regex command_pattern; + bool (*matches)(std::string_view command); /// Generate a rule from a matched command /// Returns nullopt if pattern matches but rule shouldn't be generated diff --git a/include/pup/graph/scanners/gcc.hpp b/include/pup/graph/scanners/gcc.hpp index 9bfa163..a5839c4 100644 --- a/include/pup/graph/scanners/gcc.hpp +++ b/include/pup/graph/scanners/gcc.hpp @@ -34,6 +34,11 @@ class GccScanner final : public DepScanner { [[nodiscard]] auto make_gcc_scanner() -> std::unique_ptr; +/// Check if a command string is a GCC/Clang compile command (compiler + -c flag). +/// Used as a lightweight predicate for RulePattern matching without std::regex. +[[nodiscard]] +auto matches_gcc_compile(std::string_view command) -> bool; + // TODO: Add MsvcScanner for cl.exe using /showIncludes (stdout) or /sourceDependencies (file) } // namespace pup::graph::scanners diff --git a/include/pup/index/entry.hpp b/include/pup/index/entry.hpp index 21ab8ea..00ea8ef 100644 --- a/include/pup/index/entry.hpp +++ b/include/pup/index/entry.hpp @@ -4,12 +4,15 @@ #pragma once #include "format.hpp" +#include "pup/core/arena.hpp" +#include "pup/core/node_id_map.hpp" +#include "pup/core/sorted_id_vec.hpp" +#include "pup/core/string_pool.hpp" #include "pup/core/types.hpp" #include #include #include -#include #include namespace pup::index { @@ -153,8 +156,7 @@ class Index { [[nodiscard]] auto find_command_by_id(NodeId id) const -> CommandEntry const*; - /// Find command by command string (requires prior build_command_lookup() call) - /// @deprecated Use build_command_lookup() for explicit lookup map management + /// Find command by expanded command string (requires prior build_edge_indices() call) [[nodiscard]] auto find_command_by_command(std::string const& cmd) const -> CommandEntry const*; @@ -217,21 +219,21 @@ class Index { std::vector commands_ = {}; std::vector edges_ = {}; - // Edge indices for O(1) lookup (indices into edges_ vector) - std::unordered_map> edges_from_index_ = {}; - std::unordered_map> edges_to_index_ = {}; + // Edge indices (indices into edges_ vector) + Arena32 edge_arena_; + NodeIdArenaIndex edges_from_index_; + NodeIdArenaIndex edges_to_index_; - // Command index for O(1) lookup by command string (index into commands_ vector) - std::unordered_map command_index_ = {}; + // Command index (command string → index into commands_ vector) + StringPool command_strings_; + SortedPairVec command_index_; // Index save time (nanoseconds since epoch) for racy-clean detection std::int64_t save_time_ns_ = 0; [[nodiscard]] - auto lookup_edges( - std::unordered_map> const& index, - NodeId id - ) const -> std::vector; + auto lookup_edges(NodeIdArenaIndex const& index, NodeId id) const + -> std::vector; }; /// Reconstruct full command string from instruction + operands. @@ -242,12 +244,4 @@ class Index { [[nodiscard]] auto get_command_string(Index const& index, CommandEntry const& cmd) -> std::string; -/// Build a lookup map from command strings to command entries. -/// This is O(N) reconstruction - call once when needed for change detection. -/// @param index The index to build lookup from -/// @return Map of command string -> command entry pointer -[[nodiscard]] -auto build_command_lookup(Index const& index) - -> std::unordered_map; - } // namespace pup::index diff --git a/include/pup/index/reader.hpp b/include/pup/index/reader.hpp index a884df6..abbf74e 100644 --- a/include/pup/index/reader.hpp +++ b/include/pup/index/reader.hpp @@ -9,8 +9,8 @@ #include "pup/platform/file_io.hpp" #include -#include #include +#include #include namespace pup::index { @@ -23,11 +23,11 @@ struct IndexFile { /// Open an index file for reading [[nodiscard]] -auto open_index(std::filesystem::path const& path) -> Result; +auto open_index(std::string const& path) -> Result; /// Check if a file is a valid index file (checks magic and version) [[nodiscard]] -auto is_valid_index(std::filesystem::path const& path) -> bool; +auto is_valid_index(std::string const& path) -> bool; /// Read the entire index into memory [[nodiscard]] @@ -35,7 +35,7 @@ auto read_index(IndexFile const& f) -> Result; /// Convenience: open and read index in one call [[nodiscard]] -auto read_index(std::filesystem::path const& path) -> Result; +auto read_index(std::string const& path) -> Result; /// Get the header [[nodiscard]] diff --git a/include/pup/index/writer.hpp b/include/pup/index/writer.hpp index 767dc12..7a240e7 100644 --- a/include/pup/index/writer.hpp +++ b/include/pup/index/writer.hpp @@ -7,7 +7,7 @@ #include "format.hpp" #include "pup/core/result.hpp" -#include +#include #include namespace pup::index { @@ -16,7 +16,7 @@ namespace pup::index { /// Uses a temporary file and rename for atomic operation [[nodiscard]] auto write_index( - std::filesystem::path const& path, + std::string const& path, Index const& index ) -> Result; diff --git a/include/pup/parser/config.hpp b/include/pup/parser/config.hpp index 396e610..332e6d5 100644 --- a/include/pup/parser/config.hpp +++ b/include/pup/parser/config.hpp @@ -6,7 +6,7 @@ #include "eval.hpp" #include "pup/core/result.hpp" -#include +#include #include namespace pup::parser { @@ -21,7 +21,7 @@ namespace pup::parser { /// The CONFIG_ prefix is stripped when storing, so CONFIG_DEBUG=y /// becomes accessible as @(DEBUG) in Tupfiles. [[nodiscard]] -auto parse_config(std::filesystem::path const& path) -> Result; +auto parse_config(std::string const& path) -> Result; /// Parse config from string content (for testing) [[nodiscard]] diff --git a/include/pup/parser/depfile.hpp b/include/pup/parser/depfile.hpp index 2efe8cc..59ec73c 100644 --- a/include/pup/parser/depfile.hpp +++ b/include/pup/parser/depfile.hpp @@ -5,7 +5,6 @@ #include "pup/core/result.hpp" -#include #include #include #include @@ -20,7 +19,7 @@ struct Depfile { /// Parse a dependency file from a filesystem path [[nodiscard]] -auto parse_depfile(std::filesystem::path const& path) -> Result; +auto parse_depfile(std::string const& path) -> Result; /// Parse a dependency file from string content [[nodiscard]] diff --git a/include/pup/parser/eval.hpp b/include/pup/parser/eval.hpp index 0cc93b8..e7f0eb6 100644 --- a/include/pup/parser/eval.hpp +++ b/include/pup/parser/eval.hpp @@ -5,59 +5,46 @@ #include "ast.hpp" #include "pup/core/result.hpp" +#include "pup/core/sorted_id_vec.hpp" +#include "pup/core/string_pool.hpp" -#include #include -#include #include #include -#include -#include #include -namespace pup::parser { +namespace pup::graph { +struct VarDepTracker; +} -/// Transparent hash for heterogeneous lookup in VarDb -struct StringHash { - using is_transparent = void; - auto operator()(std::string_view sv) const noexcept -> std::size_t - { - return std::hash {}(sv); - } - auto operator()(std::string const& s) const noexcept -> std::size_t - { - return std::hash {}(s); - } -}; +namespace pup::parser { /// Variable database for storing and retrieving variable values class VarDb { public: VarDb() = default; - /// Set a variable value (replaces existing) auto set(std::string_view name, std::string value) -> void; - - /// Append to a variable value (space-separated) auto append(std::string_view name, std::string_view value) -> void; - /// Get a variable value (returns empty if not found) [[nodiscard]] auto get(std::string_view name) const -> std::string_view; - - /// Check if variable exists [[nodiscard]] auto contains(std::string_view name) const -> bool; - - /// Get all variable names [[nodiscard]] auto names() const -> std::vector; - /// Clear all variables auto clear() -> void; private: - std::unordered_map> vars_; + struct Entry { + std::string name; + std::string value; + }; + std::vector entries_; + + auto find_entry(std::string_view name) -> Entry*; + auto find_entry(std::string_view name) const -> Entry const*; }; /// Identifies which variable bank a lookup resolved from @@ -85,8 +72,11 @@ struct VarContext { std::string_view tup_srcdir = {}; ///< TUP_SRCDIR value std::string_view tup_outdir = {}; ///< TUP_OUTDIR value - /// Set of imported environment variable names - std::unordered_set const* imported_vars = nullptr; + /// Set of imported variable name StringIds + SortedIdVec const* imported_vars = nullptr; + + /// String pool for looking up StringIds (read-only during lookup) + StringPool const* string_pool = nullptr; }; /// Result of variable lookup with bank information @@ -145,35 +135,34 @@ struct EvalContext { /// Callback for requesting a directory's Tupfile to be parsed (for cross-directory deps) /// Called when a path references another directory that may have a Tupfile. /// Returns success if directory was parsed, error if circular/missing. - std::function(std::filesystem::path const&)> request_directory = {}; + std::function(std::string const&)> request_directory = {}; /// Set of directories that have Tupfiles (relative to root) /// Used to determine when to invoke request_directory callback - std::set const* available_tupfile_dirs = nullptr; + std::vector const* available_tupfile_dirs = nullptr; /// Callback for tracking config variable usage (for fine-grained dependency tracking) /// Called with the stripped variable name (e.g., "OPT" not "CONFIG_OPT") when /// a config variable is accessed via @(VAR) or $(CONFIG_VAR). std::function on_config_var_used = {}; - /// Set of imported environment variable names (for tracking which vars are imported) - std::unordered_set const* imported_vars = nullptr; + /// Set of imported variable name StringIds + SortedIdVec const* imported_vars = nullptr; /// Callback for tracking imported env variable usage (for fine-grained dependency tracking) /// Called with the variable name when an imported env var is accessed via $(VAR). std::function on_env_var_used = {}; - /// Map of regular variable names to their transitive config var dependencies. + /// String pool for resolving StringIds during variable lookup + StringPool const* string_pool = nullptr; + + /// Tracker for transitive config var dependencies of regular variables. /// When a variable is assigned a value containing @(CONFIG_VAR), record the dependency. /// When that variable is later expanded, propagate the dependency via on_config_var_used. - std::unordered_map, StringHash, std::equal_to<>> const* - var_config_deps - = nullptr; - - /// Map of regular variable names to their transitive env var dependencies. - std::unordered_map, StringHash, std::equal_to<>> const* - var_env_deps - = nullptr; + graph::VarDepTracker const* var_config_deps = nullptr; + + /// Tracker for transitive env var dependencies of regular variables. + graph::VarDepTracker const* var_env_deps = nullptr; /// Callback for tracking variable assignments (for show var command) /// Called when a variable is assigned with the variable name, operator, before/after values, diff --git a/include/pup/parser/glob.hpp b/include/pup/parser/glob.hpp index 4884fe5..f1e6b4d 100644 --- a/include/pup/parser/glob.hpp +++ b/include/pup/parser/glob.hpp @@ -5,7 +5,6 @@ #include "pup/core/result.hpp" -#include #include #include #include @@ -76,7 +75,7 @@ class Glob { [[nodiscard]] auto glob_expand( std::string_view pattern, - std::filesystem::path const& base_dir, + std::string const& base_dir, GlobOptions const& options = {} ) -> Result>; @@ -84,7 +83,7 @@ auto glob_expand( [[nodiscard]] auto glob_expand_all( std::vector const& patterns, - std::filesystem::path const& base_dir, + std::string const& base_dir, GlobOptions const& options = {} ) -> Result; diff --git a/include/pup/parser/ignore.hpp b/include/pup/parser/ignore.hpp index d28e5a1..c9a298d 100644 --- a/include/pup/parser/ignore.hpp +++ b/include/pup/parser/ignore.hpp @@ -5,7 +5,6 @@ #include "pup/core/result.hpp" -#include #include #include #include @@ -27,7 +26,7 @@ class IgnoreList { IgnoreList() = default; /// Load patterns from a .pupignore file - static auto load(std::filesystem::path const& path) -> Result; + static auto load(std::string const& path) -> Result; /// Create an IgnoreList with default patterns (.git/, .pup/, node_modules/) static auto with_defaults() -> IgnoreList; @@ -37,7 +36,7 @@ class IgnoreList { /// Check if a relative path should be ignored [[nodiscard]] - auto is_ignored(std::filesystem::path const& rel_path) const -> bool; + auto is_ignored(std::string const& rel_path) const -> bool; /// Check if the list has no patterns [[nodiscard]] @@ -61,7 +60,7 @@ class IgnoreList { /// Check if a path matches a pattern [[nodiscard]] - auto match_pattern(IgnorePattern const& p, std::filesystem::path const& path) const -> bool; + auto match_pattern(IgnorePattern const& p, std::string const& path) const -> bool; /// Match a glob pattern against a string [[nodiscard]] diff --git a/include/pup/platform/file_io.hpp b/include/pup/platform/file_io.hpp index a9c04e3..dfd146a 100644 --- a/include/pup/platform/file_io.hpp +++ b/include/pup/platform/file_io.hpp @@ -6,23 +6,23 @@ #include "pup/core/result.hpp" #include -#include +#include #include #include +#include +#include +#include namespace pup::platform { -/// File metadata from stat struct FileStat { std::uint64_t size = 0; - std::int64_t mtime_ns = 0; ///< Modification time (nanoseconds since epoch) + std::int64_t mtime_ns = 0; }; -/// Get file metadata [[nodiscard]] -auto stat_file(std::filesystem::path const& path) -> Result; +auto stat_file(std::string const& path) -> Result; -/// Memory-mapped read-only file (platform-specific data via pimpl) class MappedFile { public: MappedFile(); @@ -34,23 +34,18 @@ class MappedFile { MappedFile(MappedFile&& other) noexcept; auto operator=(MappedFile&& other) noexcept -> MappedFile&; - /// Open a file for memory-mapped reading [[nodiscard]] - static auto open(std::filesystem::path const& path) -> Result; + static auto open(std::string const& path) -> Result; - /// Get pointer to mapped data [[nodiscard]] auto data() const -> std::byte const*; - /// Get size of mapped data [[nodiscard]] auto size() const -> std::size_t; - /// Check if file is open [[nodiscard]] auto is_open() const -> bool; - /// Close the mapped file auto close() -> void; private: @@ -58,11 +53,62 @@ class MappedFile { std::unique_ptr impl_; }; -/// Write data atomically to a file (write to temp, then rename) [[nodiscard]] auto atomic_write( - std::filesystem::path const& path, + std::string const& path, std::span data ) -> Result; +// Filesystem queries +[[nodiscard]] +auto exists(std::string const& path) -> bool; +[[nodiscard]] +auto is_file(std::string const& path) -> bool; +[[nodiscard]] +auto is_directory(std::string const& path) -> bool; +[[nodiscard]] +auto is_symlink(std::string const& path) -> bool; +[[nodiscard]] +auto is_empty(std::string const& path) -> bool; + +// Filesystem mutations +[[nodiscard]] +auto create_directories(std::string const& path) -> Result; +[[nodiscard]] +auto remove_file(std::string const& path) -> Result; +[[nodiscard]] +auto remove_all(std::string const& path) -> Result; +[[nodiscard]] +auto rename_path(std::string const& from, std::string const& to) -> Result; +[[nodiscard]] +auto copy_file(std::string const& from, std::string const& to) -> Result; + +// Path resolution +[[nodiscard]] +auto current_directory() -> Result; +[[nodiscard]] +auto canonical(std::string const& path) -> Result; +[[nodiscard]] +auto absolute(std::string const& path) -> Result; +[[nodiscard]] +auto read_symlink(std::string const& path) -> Result; + +// File I/O +[[nodiscard]] +auto read_file(std::string const& path) -> Result; +[[nodiscard]] +auto write_file(std::string const& path, std::string_view data) -> Result; + +// Directory traversal +struct DirEntry { + std::string name; + bool is_dir = false; +}; +[[nodiscard]] +auto read_directory(std::string const& path) -> Result>; + +using WalkVisitor = std::function; +[[nodiscard]] +auto walk_directory(std::string const& path, WalkVisitor const& visitor) -> Result; + } // namespace pup::platform diff --git a/include/pup/platform/path.hpp b/include/pup/platform/path.hpp index cac8d85..227b31c 100644 --- a/include/pup/platform/path.hpp +++ b/include/pup/platform/path.hpp @@ -3,14 +3,11 @@ #pragma once -#include #include namespace pup::platform { -/// Convert a filesystem path to a UTF-8 encoded string. -/// On POSIX, paths are already UTF-8. On Windows, this converts from UTF-16. [[nodiscard]] -auto to_utf8(std::filesystem::path const& path) -> std::string; +auto to_utf8(std::string const& path) -> std::string; } // namespace pup::platform diff --git a/include/pup/platform/process.hpp b/include/pup/platform/process.hpp index 03c8464..25a4a27 100644 --- a/include/pup/platform/process.hpp +++ b/include/pup/platform/process.hpp @@ -6,7 +6,6 @@ #include "pup/core/result.hpp" #include -#include #include #include #include @@ -27,7 +26,7 @@ struct ProcessResult { /// Options for process execution struct ProcessOptions { std::string command; - std::filesystem::path working_dir; + std::string working_dir; std::vector env; bool inherit_env = true; bool capture_stdout = true; diff --git a/src/cli/cmd_build.cpp b/src/cli/cmd_build.cpp index 4c18148..9f3b25e 100644 --- a/src/cli/cmd_build.cpp +++ b/src/cli/cmd_build.cpp @@ -8,6 +8,8 @@ #include "pup/core/hash.hpp" #include "pup/core/layout.hpp" #include "pup/core/metrics.hpp" +#include "pup/core/node_id_map.hpp" +#include "pup/core/path.hpp" #include "pup/core/path_utils.hpp" #include "pup/core/terminal.hpp" #include "pup/core/types.hpp" @@ -138,13 +140,14 @@ auto is_tupfile(std::string_view path) -> bool auto walk_upstream_from_scope( pup::graph::BuildGraph const& graph, std::vector const& scopes -) -> std::set +) -> std::vector { if (scopes.empty()) { return {}; } - auto visited = std::set {}; + auto visited = pup::NodeIdMap32 {}; + auto result = std::vector {}; auto stack = std::vector {}; // Seed with commands whose source_dir is in scope @@ -162,7 +165,8 @@ auto walk_upstream_from_scope( continue; } - visited.insert(id); + visited.set(id, 1); + result.push_back(id); for (auto input_id : graph.get_inputs(id)) { stack.push_back(input_id); @@ -176,9 +180,11 @@ auto walk_upstream_from_scope( auto id = stack.back(); stack.pop_back(); - if (!visited.insert(id).second) { + if (visited.contains(id)) { continue; } + visited.set(id, 1); + result.push_back(id); for (auto input_id : graph.get_inputs(id)) { stack.push_back(input_id); @@ -188,7 +194,7 @@ auto walk_upstream_from_scope( } } - return visited; + return result; } /// Collect all upstream input file paths for commands in the given scopes. @@ -217,19 +223,19 @@ auto collect_upstream_files( auto collect_scope_with_upstream_commands( pup::graph::BuildGraph const& graph, std::vector const& scopes -) -> std::set +) -> pup::NodeIdMap32 { - auto commands = std::set {}; + auto commands = pup::NodeIdMap32 {}; for (auto id : walk_upstream_from_scope(graph, scopes)) { if (pup::node_id::is_command(id) && graph.get_command_node(id)) { - commands.insert(id); + commands.set(id, 1); } } return commands; } auto find_changed_files_with_implicit( - std::filesystem::path const& source_root, + std::string const& source_root, pup::index::Index const& old_index, std::vector const& scopes, std::set const& upstream_files, @@ -259,8 +265,8 @@ auto find_changed_files_with_implicit( // File resolution: // All paths are now source-relative (generated files include build root, e.g., "build/program"). - auto file_path = std::filesystem::path { file.path }; - auto path = file_path.is_absolute() ? file_path : source_root / file_path; + auto file_path = std::string { file.path }; + auto path = pup::path::is_absolute(file_path) ? file_path : pup::path::join(source_root, file_path); ++metrics.stat_calls; auto stat_result = pup::platform::stat_file(path); @@ -326,18 +332,18 @@ struct ImplicitDepContext { std::unordered_map& path_to_id; pup::NodeId& next_id; std::set>& added_edges; - std::filesystem::path const& source_root; + std::string const& source_root; }; /// Recursively get or create directory entries in the index. /// Returns the NodeId for the directory at dir_path. auto get_or_create_dir( ImplicitDepContext& ctx, - std::filesystem::path const& dir_path + std::string const& dir_path ) -> pup::NodeId { - auto normalized = dir_path.lexically_normal(); - auto path_str = normalized.generic_string(); + auto normalized = pup::path::normalize(dir_path); + auto path_str = normalized; if (path_str.empty() || path_str == ".") { return pup::NodeId { 0 }; @@ -366,9 +372,9 @@ auto get_or_create_dir( return dir_id; } - auto parent_path = normalized.parent_path(); + auto parent_path = std::string { pup::path::parent(normalized) }; auto parent_id = get_or_create_dir(ctx, parent_path); - auto basename = normalized.filename().string(); + auto basename = std::string { pup::path::filename(normalized) }; auto dir_id = ctx.next_id++; auto entry = pup::index::FileEntry { @@ -392,14 +398,14 @@ auto get_or_create_dir( /// Creates parent directories as needed and returns the file's NodeId. auto create_implicit_file( ImplicitDepContext& ctx, - std::filesystem::path const& abs_path, + std::string const& abs_path, std::string const& rel_path ) -> pup::NodeId { auto content_hash = pup::Hash256 {}; auto file_size = std::uint64_t { 0 }; auto mtime_ns = std::int64_t { 0 }; - if (std::filesystem::exists(abs_path)) { + if (pup::platform::exists(abs_path)) { auto hash_result = pup::sha256_file(abs_path); if (hash_result) { content_hash = *hash_result; @@ -414,9 +420,9 @@ auto create_implicit_file( } } - auto fs_path = std::filesystem::path { rel_path }; - auto parent_id = get_or_create_dir(ctx, fs_path.parent_path()); - auto basename = fs_path.filename().string(); + auto fs_path = std::string { rel_path }; + auto parent_id = get_or_create_dir(ctx, std::string { pup::path::parent(fs_path) }); + auto basename = std::string { pup::path::filename(fs_path) }; auto file_id = ctx.next_id++; @@ -441,8 +447,8 @@ auto create_implicit_file( /// Returns the populated index and a path-to-id mapping for later use. auto serialize_graph_nodes( pup::graph::BuildGraph const& graph, - std::filesystem::path const& source_root, - std::filesystem::path const& output_root + std::string const& source_root, + std::string const& output_root ) -> std::pair> { auto index = pup::index::Index {}; @@ -471,14 +477,14 @@ auto serialize_graph_nodes( } auto file_path = (node->type == pup::NodeType::Generated) - ? output_root / fs_path - : source_root / node_path; + ? pup::path::join(output_root, fs_path) + : pup::path::join(source_root, node_path); auto content_hash = pup::Hash256 {}; auto file_size = std::uint64_t { 0 }; auto mtime_ns = std::int64_t { 0 }; - if (std::filesystem::exists(file_path)) { + if (pup::platform::exists(file_path)) { auto hash_result = pup::sha256_file(file_path); if (hash_result) { content_hash = *hash_result; @@ -633,14 +639,14 @@ auto process_implicit_deps( { for (auto const& [cmd_id, deps] : discovered_deps) { for (auto const& dep_path : deps) { - auto dep_fs_path = std::filesystem::path { dep_path }; - auto abs_path = dep_fs_path.is_absolute() ? dep_fs_path : ctx.source_root / dep_fs_path; + auto dep_fs_path = std::string { dep_path }; + auto abs_path = pup::path::is_absolute(dep_fs_path) ? dep_fs_path : pup::path::join(ctx.source_root, dep_fs_path); auto rel_path = std::string {}; if (pup::is_path_under(abs_path, ctx.source_root)) { - rel_path = std::filesystem::relative(abs_path, ctx.source_root).generic_string(); + rel_path = pup::path::relative(abs_path, ctx.source_root); } else { - rel_path = abs_path.generic_string(); + rel_path = abs_path; } auto it = ctx.path_to_id.find(rel_path); @@ -668,9 +674,9 @@ auto preserve_old_implicit_edges( ImplicitDepContext& ctx ) -> void { - auto commands_with_new_deps = std::set {}; + auto commands_with_new_deps = pup::NodeIdMap32 {}; for (auto const& [cmd_id, _] : discovered_deps) { - commands_with_new_deps.insert(cmd_id); + commands_with_new_deps.set(cmd_id, 1); } for (auto const& edge : old_index.edges()) { @@ -688,8 +694,8 @@ auto preserve_old_implicit_edges( } auto new_file_it = ctx.path_to_id.find(old_file->path); - auto old_path = std::filesystem::path { old_file->path }; - auto abs_path = old_path.is_absolute() ? old_path : ctx.source_root / old_path; + auto old_path = std::string { old_file->path }; + auto abs_path = pup::path::is_absolute(old_path) ? old_path : pup::path::join(ctx.source_root, old_path); auto new_from_id = new_file_it != ctx.path_to_id.end() ? new_file_it->second : create_implicit_file(ctx, abs_path, old_file->path); @@ -774,8 +780,8 @@ auto expand_implicit_deps( auto build_index( pup::graph::BuildGraph const& graph, std::unordered_map> const& discovered_deps, - std::filesystem::path const& source_root, - std::filesystem::path const& output_root, + std::string const& source_root, + std::string const& output_root, pup::index::Index const* old_index = nullptr ) -> pup::index::Index { @@ -807,7 +813,7 @@ auto build_index( preserve_old_implicit_edges(*old_index, discovered_deps, ctx); } - return index; + return std::move(index); } /// Validate output targets exist in the build graph. @@ -879,7 +885,7 @@ auto detect_new_commands( auto remove_stale_outputs( pup::graph::BuildGraph const& graph, pup::index::Index const& idx, - std::filesystem::path const& source_root, + std::string const& source_root, std::string_view variant_name, bool dry_run, bool verbose @@ -899,13 +905,12 @@ auto remove_stale_outputs( } // Paths now include build root (e.g., "build/program") - auto abs_path = source_root / file->path; - if (std::filesystem::exists(abs_path)) { + auto abs_path = pup::path::join(source_root, file->path); + if (pup::platform::exists(abs_path)) { if (dry_run) { vprint(variant_name, "Would remove stale: %s\n", file->path.c_str()); } else { - auto ec = std::error_code {}; - if (std::filesystem::remove(abs_path, ec)) { + if (pup::platform::remove_file(abs_path)) { if (verbose) { vprint(variant_name, " Removed stale: %s\n", file->path.c_str()); } @@ -1170,31 +1175,29 @@ auto build_single_variant( auto& deps = discovered_deps[target_id]; for (auto const& dep_path : job_result.discovered_deps) { - auto ec = std::error_code {}; - auto resolved = std::filesystem::path {}; - if (std::filesystem::path { dep_path }.is_absolute()) { - resolved = std::filesystem::weakly_canonical(dep_path, ec); - } else { - resolved = std::filesystem::weakly_canonical(job.working_dir / dep_path, ec); - } - if (ec) { + auto to_resolve = pup::path::is_absolute(dep_path) + ? dep_path + : pup::path::join(job.working_dir, dep_path); + auto resolved_result = pup::platform::canonical(to_resolve); + if (!resolved_result) { if (opts.verbose) { - fprintf(stderr, "Warning: Skipping dependency '%s': %s\n", dep_path.c_str(), ec.message().c_str()); + fprintf(stderr, "Warning: Skipping dependency '%s': %s\n", dep_path.c_str(), resolved_result.error().message.c_str()); } continue; } + auto resolved = *resolved_result; if (pup::is_path_under(resolved, ctx.layout().source_root)) { - auto rel = std::filesystem::relative(resolved, ctx.layout().source_root, ec); - if (ec) { + auto rel = pup::path::relative(resolved, ctx.layout().source_root); + if (rel.starts_with("..")) { if (opts.verbose) { - fprintf(stderr, "Warning: Cannot relativize '%s': %s\n", resolved.string().c_str(), ec.message().c_str()); + fprintf(stderr, "Warning: Cannot relativize '%s'\n", resolved.c_str()); } continue; } - deps.push_back(rel.generic_string()); + deps.push_back(rel); } else { - deps.push_back(resolved.generic_string()); + deps.push_back(resolved); } } } @@ -1219,9 +1222,10 @@ auto build_single_variant( // Identify config-generating commands to exclude from regular build // (config rules should only run during 'pup configure') - auto config_cmd_ids = std::set {}; - for (auto const& cfg : find_config_commands(ctx.graph(), ctx.layout().source_root)) { - config_cmd_ids.insert(cfg.cmd_id); + auto config_cmds = find_config_commands(ctx.graph(), ctx.layout().source_root); + auto config_cmd_ids = NodeIdMap32 {}; + for (auto const& cfg : config_cmds) { + config_cmd_ids.set(cfg.cmd_id, 1); } auto start = std::chrono::steady_clock::time_point { std::chrono::steady_clock::now() }; @@ -1231,7 +1235,7 @@ auto build_single_variant( auto mode = determine_build_mode( !target_node_ids.empty(), use_incremental, - !config_cmd_ids.empty(), + !config_cmds.empty(), scope_with_upstream ); @@ -1241,8 +1245,8 @@ auto build_single_variant( break; case BuildMode::ScopeWithUpstream: { auto scope_cmds = collect_scope_with_upstream_commands(ctx.graph(), scopes); - for (auto cfg_id : config_cmd_ids) { - scope_cmds.erase(cfg_id); + for (auto const& cfg : config_cmds) { + scope_cmds.remove(cfg.cmd_id); } build_result = scheduler.build_subset(ctx.graph(), scope_cmds); break; @@ -1251,10 +1255,10 @@ auto build_single_variant( build_result = scheduler.build_targets(ctx.graph(), target_node_ids); break; case BuildMode::Subset: { - auto non_config_cmds = std::set {}; + auto non_config_cmds = pup::NodeIdMap32 {}; for (auto id : ctx.graph().all_nodes()) { if (node_id::is_command(id) && !config_cmd_ids.contains(id)) { - non_config_cmds.insert(id); + non_config_cmds.set(id, 1); } } build_result = scheduler.build_subset(ctx.graph(), non_config_cmds); diff --git a/src/cli/cmd_clean.cpp b/src/cli/cmd_clean.cpp index 5354a0d..404a698 100644 --- a/src/cli/cmd_clean.cpp +++ b/src/cli/cmd_clean.cpp @@ -5,12 +5,13 @@ #include "pup/cli/context.hpp" #include "pup/cli/multi_variant.hpp" #include "pup/cli/output.hpp" +#include "pup/core/path.hpp" #include "pup/core/types.hpp" #include "pup/index/reader.hpp" +#include "pup/platform/file_io.hpp" #include #include -#include namespace pup::cli { @@ -39,8 +40,8 @@ auto veprint(std::string_view variant_name, char const* fmt, Args&&... args) -> } auto remove_indexed_outputs( - std::filesystem::path const& index_path, - std::filesystem::path const& root, + std::string const& index_path, + std::string const& root, OutputMode mode, std::string_view variant_name ) -> RemoveResult @@ -60,15 +61,15 @@ auto remove_indexed_outputs( continue; } - auto rel_path = std::filesystem::path { file.path }; - auto abs_path = root / rel_path; - for (auto parent = abs_path.parent_path(); - !parent.empty() && parent != parent.parent_path(); - parent = parent.parent_path()) { - result.output_dirs.insert(parent); + auto rel_path = std::string { file.path }; + auto abs_path = pup::path::join(root, rel_path); + for (auto parent = std::string { pup::path::parent(abs_path) }; + !parent.empty() && parent != std::string { pup::path::parent(parent) }; + parent = std::string { pup::path::parent(parent) }) { + result.output_dirs.push_back(parent); } - if (!std::filesystem::exists(abs_path)) { + if (!pup::platform::exists(abs_path)) { continue; } @@ -78,14 +79,14 @@ auto remove_indexed_outputs( continue; } - auto ec = std::error_code {}; - if (std::filesystem::remove(abs_path, ec)) { + auto r = pup::platform::remove_file(abs_path); + if (r) { ++result.removed_count; if (mode.verbose) { vprint(variant_name, "Removed: %s\n", file.path.c_str()); } - } else if (ec) { - veprint(variant_name, "Error removing %s: %s\n", file.path.c_str(), ec.message().c_str()); + } else { + veprint(variant_name, "Error removing %s: %s\n", file.path.c_str(), r.error().message.c_str()); ++result.error_count; } } @@ -101,15 +102,15 @@ auto clean_single_variant(Options const& opts, std::string_view variant_name) -> return EXIT_FAILURE; } - auto index_path = ctx->build_dir / ".pup" / "index"; - if (!std::filesystem::exists(index_path)) { + auto index_path = pup::path::join(pup::path::join(ctx->build_dir, ".pup"), "index"); + if (!pup::platform::exists(index_path)) { vprint(variant_name, "Nothing to clean (no index found)\n"); return EXIT_SUCCESS; } auto mode = OutputMode { .dry_run = opts.dry_run, .verbose = opts.verbose }; // Paths are source-relative and include build root prefix for variant builds - // (e.g., "build/hello.o"). Use root so root / path gives correct absolute path. + // (e.g., "build/hello.o"). Use root so pup::path::join(root, path) gives correct absolute path. auto result = remove_indexed_outputs(index_path, ctx->root, mode, variant_name); auto dirs_removed = remove_empty_directories( @@ -133,44 +134,44 @@ auto distclean_single_variant(Options const& opts, std::string_view variant_name return EXIT_FAILURE; } - auto index_path = ctx->build_dir / ".pup" / "index"; + auto index_path = pup::path::join(pup::path::join(ctx->build_dir, ".pup"), "index"); auto error_count = std::size_t { 0 }; - auto output_dirs = std::set {}; + auto output_dirs = std::vector {}; auto mode = OutputMode { .dry_run = opts.dry_run, .verbose = opts.verbose }; - if (std::filesystem::exists(index_path)) { + if (pup::platform::exists(index_path)) { // Paths are source-relative and include build root prefix for variant builds. auto result = remove_indexed_outputs(index_path, ctx->root, mode, variant_name); error_count += result.error_count; output_dirs = std::move(result.output_dirs); } - auto pup_dir = ctx->build_dir / ".pup"; - if (std::filesystem::exists(pup_dir)) { + auto pup_dir = pup::path::join(ctx->build_dir, ".pup"); + if (pup::platform::exists(pup_dir)) { if (opts.dry_run) { - vprint(variant_name, "Would remove: %s\n", pup_dir.string().c_str()); + vprint(variant_name, "Would remove: %s\n", pup_dir.c_str()); } else { if (opts.verbose) { - vprint(variant_name, "Removing: %s\n", pup_dir.string().c_str()); + vprint(variant_name, "Removing: %s\n", pup_dir.c_str()); } - std::filesystem::remove_all(pup_dir); + (void)pup::platform::remove_all(pup_dir); } } - auto config_path = ctx->build_dir / "tup.config"; - if (std::filesystem::exists(config_path)) { + auto config_path = pup::path::join(ctx->build_dir, "tup.config"); + if (pup::platform::exists(config_path)) { if (opts.dry_run) { - vprint(variant_name, "Would remove: %s\n", config_path.string().c_str()); + vprint(variant_name, "Would remove: %s\n", config_path.c_str()); } else { if (opts.verbose) { - vprint(variant_name, "Removing: %s\n", config_path.string().c_str()); + vprint(variant_name, "Removing: %s\n", config_path.c_str()); } - std::filesystem::remove(config_path); + (void)pup::platform::remove_file(config_path); } } - output_dirs.insert(ctx->build_dir); + output_dirs.push_back(ctx->build_dir); remove_empty_directories(output_dirs, ctx->build_dir, ctx->root, mode); if (!opts.dry_run) { diff --git a/src/cli/cmd_configure.cpp b/src/cli/cmd_configure.cpp index 9fc6cea..c76ed62 100644 --- a/src/cli/cmd_configure.cpp +++ b/src/cli/cmd_configure.cpp @@ -9,11 +9,10 @@ #include "pup/core/path_utils.hpp" #include "pup/exec/scheduler.hpp" #include "pup/graph/dag.hpp" +#include "pup/platform/file_io.hpp" #include #include -#include -#include namespace pup::cli { @@ -25,21 +24,21 @@ auto install_config_file( std::string_view variant_name ) -> int { - auto config_path = std::filesystem::path { config_file }; - if (config_path.is_relative()) { - config_path = std::filesystem::current_path() / config_path; + auto config_path = std::string { config_file }; + if (!pup::path::is_absolute(config_path)) { + config_path = pup::path::join(*pup::platform::current_directory(), config_path); } - if (!std::filesystem::exists(config_path)) { - fprintf(stderr, "[%.*s] Error: Config file not found: %s\n", static_cast(variant_name.size()), variant_name.data(), config_path.string().c_str()); + if (!pup::platform::exists(config_path)) { + fprintf(stderr, "[%.*s] Error: Config file not found: %s\n", static_cast(variant_name.size()), variant_name.data(), config_path.c_str()); return EXIT_FAILURE; } - auto dest = layout.output_root / "tup.config"; - std::filesystem::create_directories(dest.parent_path()); - std::filesystem::copy_file(config_path, dest, std::filesystem::copy_options::overwrite_existing); + auto dest = pup::path::join(layout.output_root, "tup.config"); + (void)pup::platform::create_directories(std::string { pup::path::parent(dest) }); + (void)pup::platform::copy_file(config_path, dest); - printf("[%.*s] Installed %s -> %s\n", static_cast(variant_name.size()), variant_name.data(), config_path.string().c_str(), dest.string().c_str()); + printf("[%.*s] Installed %s -> %s\n", static_cast(variant_name.size()), variant_name.data(), config_path.c_str(), dest.c_str()); return EXIT_SUCCESS; } @@ -83,12 +82,11 @@ auto configure_single_variant( // Helper to ensure tup.config exists for variant detection (only on success) auto ensure_config = [&]() { - auto config_path = ctx.layout().output_root / "tup.config"; - if (!std::filesystem::exists(config_path)) { - std::filesystem::create_directories(config_path.parent_path()); - auto ofs = std::ofstream { config_path }; - ofs.close(); - printf("[%.*s] Created %s\n", static_cast(variant_name.size()), variant_name.data(), config_path.string().c_str()); + auto config_path = pup::path::join(ctx.layout().output_root, "tup.config"); + if (!pup::platform::exists(config_path)) { + (void)pup::platform::create_directories(std::string { pup::path::parent(config_path) }); + (void)pup::platform::write_file(config_path, ""); + printf("[%.*s] Created %s\n", static_cast(variant_name.size()), variant_name.data(), config_path.c_str()); } }; @@ -100,14 +98,14 @@ auto configure_single_variant( } // Filter config commands by scope if specified - auto config_commands = std::set {}; + auto config_commands = pup::NodeIdMap32 {}; for (auto const& cfg : configs) { auto const* node = ctx.graph().get_command_node(cfg.cmd_id); auto source_dir_sv = node ? pup::graph::get_source_dir(ctx.graph().graph(), cfg.cmd_id) : std::string_view {}; if (!scopes.empty() && node && !pup::is_path_in_any_scope(std::string { source_dir_sv }, scopes)) { continue; } - config_commands.insert(cfg.cmd_id); + config_commands.set(cfg.cmd_id, 1); if (opts.verbose) { auto display_sv = node ? pup::graph::get_display_str(ctx.graph().graph(), cfg.cmd_id) : std::string_view { "" }; printf("[%.*s] Config rule: %.*s -> %s\n", static_cast(variant_name.size()), variant_name.data(), static_cast(display_sv.size()), display_sv.data(), cfg.output_path.c_str()); diff --git a/src/cli/cmd_parse.cpp b/src/cli/cmd_parse.cpp index 79073bc..b2dd22b 100644 --- a/src/cli/cmd_parse.cpp +++ b/src/cli/cmd_parse.cpp @@ -5,6 +5,7 @@ #include "pup/cli/context.hpp" #include "pup/cli/multi_variant.hpp" #include "pup/core/layout.hpp" +#include "pup/core/path.hpp" #include "pup/core/types.hpp" #include "pup/graph/dag.hpp" @@ -37,13 +38,13 @@ auto parse_single_variant(Options const& opts, std::string_view variant_name) -> auto& ctx = *result; if (opts.verbose) { - printf("[%.*s] Project root: \"%s\"\n", static_cast(variant_name.size()), variant_name.data(), ctx.layout().source_root.string().c_str()); + printf("[%.*s] Project root: \"%s\"\n", static_cast(variant_name.size()), variant_name.data(), ctx.layout().source_root.c_str()); printf("[%.*s] Tupfiles:\n", static_cast(variant_name.size()), variant_name.data()); for (auto const& dir : ctx.parsed_dirs()) { auto tupfile_path = (dir == "." || dir.empty()) - ? ctx.layout().source_root / "Tupfile" - : ctx.layout().source_root / dir / "Tupfile"; - printf("[%.*s] %s\n", static_cast(variant_name.size()), variant_name.data(), tupfile_path.string().c_str()); + ? pup::path::join(ctx.layout().source_root, "Tupfile") + : pup::path::join(pup::path::join(ctx.layout().source_root, dir), "Tupfile"); + printf("[%.*s] %s\n", static_cast(variant_name.size()), variant_name.data(), tupfile_path.c_str()); } } diff --git a/src/cli/cmd_show.cpp b/src/cli/cmd_show.cpp index e3a3c10..e3d59e0 100644 --- a/src/cli/cmd_show.cpp +++ b/src/cli/cmd_show.cpp @@ -6,6 +6,8 @@ #include "pup/cli/multi_variant.hpp" #include "pup/cli/output.hpp" #include "pup/core/layout.hpp" +#include "pup/core/node_id_map.hpp" +#include "pup/core/path.hpp" #include "pup/core/string_utils.hpp" #include "pup/core/types.hpp" #include "pup/graph/dag.hpp" @@ -21,9 +23,7 @@ #include #include #include -#include #include -#include namespace pup::cli { @@ -39,7 +39,7 @@ auto load_index_for_all_deps( } auto index_path = layout.index_path(); - if (!std::filesystem::exists(index_path)) { + if (!pup::platform::exists(index_path)) { fprintf(stderr, "Warning: No index found - run 'putup' first\n"); return std::nullopt; } @@ -132,7 +132,7 @@ auto cmd_export_script(Options const& opts, std::string_view variant_name) -> in printf("%.*s\n\n", static_cast(script_prologue.size()), script_prologue.data()); - auto output_dirs = std::set {}; + auto output_dirs = std::vector {}; for (auto id : ctx.graph().all_nodes()) { auto const* node = ctx.graph().get_file_node(id); if (!node) { @@ -150,11 +150,11 @@ auto cmd_export_script(Options const& opts, std::string_view variant_name) -> in auto inputs = ctx.graph().get_inputs(id); for (auto input_id : inputs) { if (node_id::is_command(input_id)) { - auto path = std::filesystem::path { node_path }; - if (path.has_parent_path()) { - auto parent = path.parent_path().generic_string(); + auto path = std::string { node_path }; + if (!pup::path::parent(path).empty()) { + auto parent = std::string { pup::path::parent(path) }; if (!parent.empty() && parent != ".") { - output_dirs.insert(parent); + output_dirs.push_back(std::move(parent)); } } break; @@ -162,6 +162,9 @@ auto cmd_export_script(Options const& opts, std::string_view variant_name) -> in } } + std::ranges::sort(output_dirs); + output_dirs.erase(std::unique(output_dirs.begin(), output_dirs.end()), output_dirs.end()); + if (!output_dirs.empty()) { printf("%.*s Create output directories\n", static_cast(script_comment.size()), script_comment.data()); for (auto const& dir : output_dirs) { @@ -252,9 +255,9 @@ auto cmd_export_graph(Options const& opts, std::string_view variant_name) -> int printf("digraph G {\n"); printf(" rankdir=LR;\n"); - auto declared_nodes = std::unordered_set {}; + auto declared_nodes = pup::NodeIdMap32 {}; for (auto id : ctx.graph().all_nodes()) { - declared_nodes.insert(id); + declared_nodes.set(id, 1); auto get_label = [&]() -> std::string { if (node_id::is_command(id)) { @@ -283,7 +286,7 @@ auto cmd_export_graph(Options const& opts, std::string_view variant_name) -> int } if (index) { - auto implicit_nodes = std::unordered_set {}; + auto implicit_nodes = pup::NodeIdMap32 {}; for (auto const& edge : index->edges()) { if (edge.type != pup::LinkType::Implicit) { @@ -293,13 +296,13 @@ auto cmd_export_graph(Options const& opts, std::string_view variant_name) -> int auto from_id = edge.from; auto to_id = edge.to; - if (declared_nodes.find(to_id) == declared_nodes.end()) { + if (!declared_nodes.contains(to_id)) { continue; } - if (declared_nodes.find(from_id) == declared_nodes.end()) { - if (implicit_nodes.find(from_id) == implicit_nodes.end()) { - implicit_nodes.insert(from_id); + if (!declared_nodes.contains(from_id)) { + if (!implicit_nodes.contains(from_id)) { + implicit_nodes.set(from_id, 1); auto const* file = index->find_file_by_id(from_id); auto label = file ? escape_dot_label(file->path) : format_node_id(from_id); printf(" %s [label=\"%s\" style=filled fillcolor=\"#f0f0f0\"];\n", format_node_id(from_id).c_str(), label.c_str()); @@ -372,18 +375,18 @@ auto cmd_export_compdb(Options const& opts, std::string_view variant_name) -> in auto source_dir_sv = graph::get_source_dir(ctx.graph().graph(), id); auto working_dir = ctx.layout().source_root; if (!source_dir_sv.empty()) { - working_dir /= source_dir_sv; + working_dir = pup::path::join(working_dir, std::string { source_dir_sv }); } // Convert project-root-relative paths to working-dir-relative - auto source_abs = ctx.layout().source_root / source_file; - auto source_rel = std::filesystem::relative(source_abs, working_dir).generic_string(); + auto source_abs = pup::path::join(ctx.layout().source_root, source_file); + auto source_rel = pup::path::relative(source_abs, working_dir); auto output_rel = std::string {}; if (!output_file.empty()) { // Generated files exist at output_root - auto output_abs = ctx.layout().output_root / output_file; - output_rel = std::filesystem::relative(output_abs, working_dir).generic_string(); + auto output_abs = pup::path::join(ctx.layout().output_root, output_file); + output_rel = pup::path::relative(output_abs, working_dir); } auto cmd_sv = graph::expand_instruction(ctx.graph().graph(), id); @@ -398,7 +401,7 @@ auto cmd_export_compdb(Options const& opts, std::string_view variant_name) -> in first = false; printf(" {\n"); - printf(" \"directory\": \"%s\",\n", escape_json(working_dir.string()).c_str()); + printf(" \"directory\": \"%s\",\n", escape_json(working_dir).c_str()); printf(" \"arguments\": ["); for (std::size_t i = 0; i < args.size(); ++i) { diff --git a/src/cli/config_commands.cpp b/src/cli/config_commands.cpp index 7f06b52..d605497 100644 --- a/src/cli/config_commands.cpp +++ b/src/cli/config_commands.cpp @@ -2,8 +2,8 @@ // Copyright (c) 2024 Putup authors #include "pup/cli/config_commands.hpp" - -#include +#include "pup/core/path.hpp" +#include "pup/platform/file_io.hpp" namespace pup::cli { @@ -18,7 +18,7 @@ auto is_config_output(std::string const& path) -> bool auto find_config_commands( graph::BuildGraph const& graph, - std::filesystem::path const& source_root + std::string const& source_root ) -> std::vector { auto result = std::vector {}; @@ -36,11 +36,11 @@ auto find_config_commands( auto path = graph.get_full_path(output_id); if (is_config_output(path)) { // Paths in graph are project-root-relative - auto full_path = source_root / path; + auto full_path = pup::path::join(source_root, path); result.push_back({ .cmd_id = id, .output_path = path, - .exists = std::filesystem::exists(full_path), + .exists = pup::platform::exists(full_path), }); } } @@ -50,11 +50,25 @@ auto find_config_commands( auto collect_command_dependencies( graph::BuildGraph const& graph, - std::set const& commands -) -> std::set + NodeIdMap32 const& commands +) -> NodeIdMap32 { - auto result = std::set { commands }; - auto worklist = std::vector { commands.begin(), commands.end() }; + auto result = NodeIdMap32 {}; + auto worklist = std::vector {}; + + auto try_add = [&](NodeId id) { + if (!result.contains(id)) { + result.set(id, 1); + worklist.push_back(id); + } + }; + + // Seed with initial commands — iterate all graph nodes, filter by membership + for (auto id : graph.all_nodes()) { + if (node_id::is_command(id) && commands.contains(id)) { + try_add(id); + } + } while (!worklist.empty()) { auto cmd_id = worklist.back(); @@ -62,17 +76,13 @@ auto collect_command_dependencies( for (auto input_id : graph.get_inputs(cmd_id)) { if (node_id::is_command(input_id)) { - if (result.insert(input_id).second) { - worklist.push_back(input_id); - } + try_add(input_id); continue; } for (auto producer_id : graph.get_inputs(input_id)) { if (node_id::is_command(producer_id)) { - if (result.insert(producer_id).second) { - worklist.push_back(producer_id); - } + try_add(producer_id); } } } @@ -80,9 +90,7 @@ auto collect_command_dependencies( auto add_producers = [&](NodeId file_id) { for (auto producer_id : graph.get_inputs(file_id)) { if (node_id::is_command(producer_id)) { - if (result.insert(producer_id).second) { - worklist.push_back(producer_id); - } + try_add(producer_id); } } }; diff --git a/src/cli/context.cpp b/src/cli/context.cpp index 97ff14b..3cd6220 100644 --- a/src/cli/context.cpp +++ b/src/cli/context.cpp @@ -4,6 +4,7 @@ #include "pup/cli/context.hpp" #include "pup/core/layout.hpp" #include "pup/core/metrics.hpp" +#include "pup/core/path.hpp" #include "pup/core/path_utils.hpp" #include "pup/core/platform.hpp" #include "pup/graph/builder.hpp" @@ -16,12 +17,11 @@ #include "pup/parser/parser.hpp" #include +#include #include #include #include -#include #include -#include #include namespace pup::cli { @@ -52,7 +52,7 @@ auto compute_build_scopes( } // Compute scope from current working directory - auto cwd = std::filesystem::current_path(); + auto cwd = *pup::platform::current_directory(); auto const& source_root = layout.source_root; // If cwd is source_root, build all @@ -81,72 +81,99 @@ auto compute_build_scopes( namespace { // Returns empty path for root-equivalent paths ("" or "."), otherwise unchanged -auto normalize_to_empty(std::filesystem::path const& p) -> std::filesystem::path +auto normalize_to_empty(std::string const& p) -> std::string { - return (p.empty() || p == ".") ? std::filesystem::path {} : p; + return (p.empty() || p == ".") ? std::string {} : p; } // Returns "." for root-equivalent paths ("" or "."), otherwise unchanged -auto normalize_to_dot(std::filesystem::path const& p) -> std::filesystem::path +auto normalize_to_dot(std::string const& p) -> std::string { - return (p.empty() || p == ".") ? std::filesystem::path { "." } : p; + return (p.empty() || p == ".") ? std::string { "." } : p; } // Joins base/rel, but if rel is root-equivalent returns just base -auto join_path(std::filesystem::path const& base, std::filesystem::path const& rel) - -> std::filesystem::path +auto join_path(std::string const& base, std::string const& rel) + -> std::string { - return (rel.empty() || rel == ".") ? base : base / rel; + return (rel.empty() || rel == ".") ? base : pup::path::join(base, rel); +} + +auto sorted_contains(std::vector const& v, std::string const& key) -> bool +{ + return std::binary_search(v.begin(), v.end(), key); +} + +auto sorted_insert(std::vector& v, std::string const& key) -> void +{ + auto pos = std::lower_bound(v.begin(), v.end(), key); + if (pos == v.end() || *pos != key) { + v.insert(pos, key); + } +} + +auto sorted_erase(std::vector& v, std::string const& key) -> void +{ + auto pos = std::lower_bound(v.begin(), v.end(), key); + assert(pos != v.end() && *pos == key); + v.erase(pos); } /// State for tracking Tupfile parsing across multiple directories struct TupfileParseState { - std::set available; - std::set parsed; - std::set parsing; - std::map parsed_configs; // Cache of parsed tup.config files (by path) - std::map scoped_configs; // Cache of merged per-dir configs + std::vector available; + std::vector parsed; + std::vector parsing; + std::map parsed_configs; // Cache of parsed tup.config files (by path) + std::map scoped_configs; // Cache of merged per-dir configs std::vector> const* config_defines = nullptr; // CLI overrides }; auto compute_tup_variantdir( - std::filesystem::path const& source_dir, - std::filesystem::path const& source_root, - std::filesystem::path const& output_root + std::string const& source_dir, + std::string const& source_root, + std::string const& output_root ) -> std::string { if (!output_root.empty() && source_root != output_root) { - auto output_dir = output_root / source_dir; - auto src_dir = source_root / source_dir; + auto output_dir = pup::path::join(output_root, source_dir); + auto src_dir = pup::path::join(source_root, source_dir); // Canonicalize to resolve symlinks — commands run from physical paths, // so the relative path must work from the physical location. - auto src_canonical = std::filesystem::weakly_canonical(src_dir); - auto out_canonical = std::filesystem::weakly_canonical(output_dir); - auto rel = std::filesystem::relative(out_canonical, src_canonical); - return rel.generic_string(); + auto src_canonical = pup::platform::canonical(src_dir); + auto out_canonical = pup::platform::canonical(output_dir); + if (src_canonical && out_canonical) { + return pup::path::relative(*out_canonical, *src_canonical); + } + return "."; } return "."; } auto find_build_subdir( - std::filesystem::path const& root -) -> std::optional + std::string const& root +) -> std::optional { for (auto const& name : { "build", "out", "variant" }) { - auto dir = std::filesystem::path { root / name }; - if (std::filesystem::exists(dir / "tup.config") - || std::filesystem::is_directory(dir / ".pup")) { + auto dir = std::string { pup::path::join(root, name) }; + if (pup::platform::exists(pup::path::join(dir, "tup.config")) + || pup::platform::is_directory(pup::path::join(dir, ".pup"))) { return dir; } } - if (std::filesystem::is_directory(root)) { - for (auto const& entry : std::filesystem::directory_iterator(root)) { - if (entry.is_directory()) { - if (std::filesystem::exists(entry.path() / "tup.config") - || std::filesystem::is_directory(entry.path() / ".pup")) { - return entry.path(); + if (pup::platform::is_directory(root)) { + auto entries = pup::platform::read_directory(root); + if (entries) { + for (auto const& entry : *entries) { + if (!entry.is_dir) { + continue; + } + auto entry_path = pup::path::join(root, entry.name); + if (pup::platform::exists(pup::path::join(entry_path, "tup.config")) + || pup::platform::is_directory(pup::path::join(entry_path, ".pup"))) { + return entry_path; } } } @@ -155,54 +182,41 @@ auto find_build_subdir( return std::nullopt; } -auto read_file(std::filesystem::path const& path) -> std::optional +auto read_file(std::string const& path) -> std::optional { - auto file = std::ifstream { path }; - if (!file) { + auto result = pup::platform::read_file(path); + if (!result) { return std::nullopt; } - - auto ss = std::stringstream {}; - ss << file.rdbuf(); - return ss.str(); + return std::move(*result); } auto discover_tupfile_dirs( - std::filesystem::path const& root, + std::string const& root, pup::parser::IgnoreList const& ignore = {} -) -> std::set +) -> std::vector { - auto dirs = std::set {}; - auto ec = std::error_code {}; - auto options = std::filesystem::directory_options::skip_permission_denied; - - for (auto it = std::filesystem::recursive_directory_iterator(root, options, ec); - it != std::filesystem::recursive_directory_iterator(); - ++it) { - if (ec) { - break; - } + auto dirs = std::vector {}; - auto const& entry = *it; - auto rel = std::filesystem::relative(entry.path(), root); + if (pup::platform::exists(pup::path::join(root, "Tupfile"))) { + dirs.push_back("."); + } - if (entry.is_directory() && ignore.is_ignored(rel)) { - it.disable_recursion_pending(); - continue; + (void)pup::platform::walk_directory(root, [&](pup::platform::DirEntry const& entry, std::string const& rel_path) -> bool { + if (entry.is_dir && ignore.is_ignored(rel_path)) { + return false; } - if (!entry.is_regular_file()) { - continue; - } - if (entry.path().filename() != "Tupfile") { - continue; + if (!entry.is_dir && entry.name == "Tupfile") { + auto dir_rel = std::string { pup::path::parent(rel_path) }; + dirs.push_back(normalize_to_dot(dir_rel)); } - auto dir = std::filesystem::path { entry.path().parent_path() }; - auto dir_rel = std::filesystem::relative(dir, root); - dirs.insert(normalize_to_dot(dir_rel)); - } + return true; + }); + std::sort(dirs.begin(), dirs.end()); + dirs.erase(std::unique(dirs.begin(), dirs.end()), dirs.end()); return dirs; } @@ -223,7 +237,7 @@ auto apply_config_overrides( /// Parse a tup.config file, returning a cached result on repeat calls. auto get_or_parse_config( - std::filesystem::path const& path, + std::string const& path, TupfileParseState& state ) -> parser::VarDb const* { @@ -233,7 +247,7 @@ auto get_or_parse_config( auto result = parser::parse_config(path); if (!result) { - fprintf(stderr, "Warning: Failed to parse %s: %s\n", path.string().c_str(), result.error().message.c_str()); + fprintf(stderr, "Warning: Failed to parse %s: %s\n", path.c_str(), result.error().message.c_str()); return nullptr; } @@ -246,8 +260,8 @@ auto get_or_parse_config( /// model as Tuprules.tup ?= defaults). /// Returns pointer to the cached VarDb for that directory. auto find_config_for_dir( - std::filesystem::path const& rel_dir, - std::filesystem::path const& output_root, + std::string const& rel_dir, + std::string const& output_root, TupfileParseState& state ) -> parser::VarDb const* { @@ -259,19 +273,26 @@ auto find_config_for_dir( } // Collect all tup.config paths from root down to target directory - auto config_paths = std::vector {}; + auto config_paths = std::vector {}; - auto root_config = output_root / "tup.config"; - if (std::filesystem::exists(root_config)) { + auto root_config = pup::path::join(output_root, "tup.config"); + if (pup::platform::exists(root_config)) { config_paths.push_back(root_config); } if (!normalized.empty()) { auto accumulated = output_root; - for (auto const& component : normalized) { - accumulated /= component; - auto config_path = accumulated / "tup.config"; - if (std::filesystem::exists(config_path)) { + auto remaining = std::string_view { normalized }; + while (!remaining.empty()) { + auto slash = remaining.find('/'); + auto component = (slash == std::string_view::npos) ? remaining : remaining.substr(0, slash); + remaining = (slash == std::string_view::npos) ? std::string_view {} : remaining.substr(slash + 1); + if (component.empty()) { + continue; + } + accumulated = pup::path::join(accumulated, std::string { component }); + auto config_path = pup::path::join(accumulated, "tup.config"); + if (pup::platform::exists(config_path)) { config_paths.push_back(config_path); } } @@ -299,19 +320,19 @@ auto find_config_for_dir( return &it->second; } -auto make_circular_dep_error(std::filesystem::path const& dir) -> pup::Error +auto make_circular_dep_error(std::string const& dir) -> pup::Error { return pup::Error { pup::ErrorCode::CyclicDependency, - std::format("Circular Tupfile dependency: {}", dir.string()) + std::format("Circular Tupfile dependency: {}", dir) }; } -auto make_read_error(std::filesystem::path const& path) -> pup::Error +auto make_read_error(std::string const& path) -> pup::Error { return pup::Error { pup::ErrorCode::IoError, - std::format("Failed to read {}", path.string()) + std::format("Failed to read {}", path) }; } @@ -319,53 +340,53 @@ struct ParseContext { TupfileParseState& state; pup::graph::GraphBuilder& builder; pup::graph::BuildGraph& graph; - std::filesystem::path const& source_root; - std::filesystem::path const& config_root; - std::filesystem::path const& output_root; + std::string const& source_root; + std::string const& config_root; + std::string const& output_root; pup::parser::VarDb const& base_vars; bool verbose; bool root_config_only; VarAssignedCallback on_var_assigned; }; -auto parse_directory(std::filesystem::path const& rel_dir, ParseContext& ctx) -> pup::Result +auto parse_directory(std::string const& rel_dir, ParseContext& ctx) -> pup::Result { auto vars = pup::parser::VarDb { ctx.base_vars }; auto normalized_dir = normalize_to_dot(rel_dir); - if (ctx.state.parsed.contains(normalized_dir)) { + if (sorted_contains(ctx.state.parsed, normalized_dir)) { return {}; } - if (ctx.state.parsing.contains(normalized_dir)) { + if (sorted_contains(ctx.state.parsing, normalized_dir)) { return pup::unexpected(make_circular_dep_error(normalized_dir)); } - ctx.state.parsing.insert(normalized_dir); + sorted_insert(ctx.state.parsing, normalized_dir); // Tupfiles are found in config_root (may differ from source_root in 3-tree builds) - auto tupfile_path = join_path(ctx.config_root, normalize_to_empty(rel_dir)) / "Tupfile"; + auto tupfile_path = pup::path::join(join_path(ctx.config_root, normalize_to_empty(rel_dir)), "Tupfile"); if (ctx.verbose) { - printf("Parsing: %s\n", tupfile_path.string().c_str()); + printf("Parsing: %s\n", tupfile_path.c_str()); } auto source = read_file(tupfile_path); if (!source) { - ctx.state.parsing.erase(normalized_dir); + sorted_erase(ctx.state.parsing, normalized_dir); return pup::unexpected(make_read_error(tupfile_path)); } - auto parse_result = pup::parser::parse_tupfile(*source, tupfile_path.string()); + auto parse_result = pup::parser::parse_tupfile(*source, tupfile_path); if (!parse_result.success()) { - ctx.state.parsing.erase(normalized_dir); + sorted_erase(ctx.state.parsing, normalized_dir); for (auto const& err : parse_result.errors) { - fprintf(stderr, "%s:%u:%u: error: %s\n", tupfile_path.string().c_str(), err.location.line, err.location.column, err.message.c_str()); + fprintf(stderr, "%s:%u:%u: error: %s\n", tupfile_path.c_str(), err.location.line, err.location.column, err.message.c_str()); } return pup::make_error(pup::ErrorCode::ParseError, "Parse failed"); } - auto tup_cwd = normalized_dir.generic_string(); + auto tup_cwd = normalized_dir; // In the "overlay" model, Tupfiles from config_root are treated as if they // were in source_root. Commands run from source_root, so all relative paths @@ -382,20 +403,22 @@ auto parse_directory(std::filesystem::path const& rel_dir, ParseContext& ctx) -> // For variant builds: e.g., "../../build/coreutils" from source/coreutils/ auto tup_outdir = std::string { "." }; if (ctx.source_root != ctx.output_root) { - auto source_dir = std::filesystem::weakly_canonical(join_path(ctx.source_root, rel_dir_normalized)); - auto output_dir = std::filesystem::weakly_canonical(join_path(ctx.output_root, rel_dir_normalized)); - tup_outdir = std::filesystem::relative(output_dir, source_dir).generic_string(); + auto source_dir = pup::platform::canonical(join_path(ctx.source_root, rel_dir_normalized)); + auto output_dir = pup::platform::canonical(join_path(ctx.output_root, rel_dir_normalized)); + if (source_dir && output_dir) { + tup_outdir = pup::path::relative(*output_dir, *source_dir); + } } // Get the scoped config for this directory (walks up tree to find nearest tup.config) // When root_config_only is set (for configure pass), always use root config auto const* scoped_config = find_config_for_dir( - ctx.root_config_only ? std::filesystem::path {} : rel_dir, + ctx.root_config_only ? std::string {} : rel_dir, ctx.output_root, ctx.state ); - auto request_directory = [&](std::filesystem::path const& dir) -> pup::Result { + auto request_directory = [&](std::string const& dir) -> pup::Result { return parse_directory(dir, ctx); }; @@ -416,8 +439,8 @@ auto parse_directory(std::filesystem::path const& rel_dir, ParseContext& ctx) -> auto result = pup::Result { ctx.builder.add_tupfile(ctx.graph, parse_result.tupfile, eval_ctx) }; - ctx.state.parsing.erase(normalized_dir); - ctx.state.parsed.insert(normalized_dir); + sorted_erase(ctx.state.parsing, normalized_dir); + sorted_insert(ctx.state.parsed, normalized_dir); if (result) { ++pup::thread_metrics().tupfiles_parsed; @@ -429,27 +452,27 @@ auto parse_directory(std::filesystem::path const& rel_dir, ParseContext& ctx) -> auto try_auto_init(ProjectLayout const& layout) -> void { auto pup_dir = layout.pup_dir(); - if (std::filesystem::exists(pup_dir)) { + if (pup::platform::exists(pup_dir)) { return; } - if (!std::filesystem::exists(layout.source_root / "Tupfile.ini")) { + if (!pup::platform::exists(pup::path::join(layout.source_root, "Tupfile.ini"))) { return; } - std::filesystem::create_directories(pup_dir); - printf("Initialized pup in \"%s\"\n", pup_dir.string().c_str()); + (void)pup::platform::create_directories(pup_dir); + printf("Initialized pup in \"%s\"\n", pup_dir.c_str()); } struct IndexLoadResult { std::optional index; - std::unordered_map cached_env_vars; + std::vector> cached_env_vars; }; -auto load_old_index(std::filesystem::path const& output_root, bool verbose) -> IndexLoadResult +auto load_old_index(std::string const& output_root, bool verbose) -> IndexLoadResult { auto result = IndexLoadResult {}; - auto index_path = output_root / ".pup" / "index"; + auto index_path = pup::path::join(pup::path::join(output_root, ".pup"), "index"); - if (!std::filesystem::exists(index_path)) { + if (!pup::platform::exists(index_path)) { return result; } @@ -477,10 +500,12 @@ auto load_old_index(std::filesystem::path const& output_root, bool verbose) -> I auto key_value = std::string_view { file.path }.substr(ENV_VAR_DIR_PREFIX.size()); auto eq_pos = key_value.find('='); if (eq_pos != std::string::npos) { - result.cached_env_vars[std::string { key_value.substr(0, eq_pos) }] = std::string { key_value.substr(eq_pos + 1) }; + result.cached_env_vars.emplace_back(std::string { key_value.substr(0, eq_pos) }, std::string { key_value.substr(eq_pos + 1) }); } } + std::sort(result.cached_env_vars.begin(), result.cached_env_vars.end()); + if (verbose && !result.cached_env_vars.empty()) { printf("Loaded %zu cached env vars from index\n", result.cached_env_vars.size()); } @@ -488,10 +513,10 @@ auto load_old_index(std::filesystem::path const& output_root, bool verbose) -> I return result; } -auto sort_dirs_by_depth(std::set const& available) -> std::vector +auto sort_dirs_by_depth(std::vector const& available) -> std::vector { - auto root_rel = std::filesystem::path { "." }; - auto dirs = std::vector { available.begin(), available.end() }; + auto root_rel = std::string { "." }; + auto dirs = std::vector { available.begin(), available.end() }; std::ranges::sort(dirs, [&root_rel](auto const& a, auto const& b) { auto is_root_a = (a == root_rel); auto is_root_b = (b == root_rel); @@ -512,8 +537,8 @@ auto load_ignore_list(ProjectLayout const& layout, bool verbose) -> pup::parser: { auto ignore = pup::parser::IgnoreList::with_defaults(); for (auto const& root : { layout.config_root, layout.source_root }) { - auto ignore_path = root / ".pupignore"; - if (!std::filesystem::exists(ignore_path)) { + auto ignore_path = pup::path::join(root, ".pupignore"); + if (!pup::platform::exists(ignore_path)) { continue; } auto ignore_result = pup::parser::IgnoreList::load(ignore_path); @@ -522,7 +547,7 @@ auto load_ignore_list(ProjectLayout const& layout, bool verbose) -> pup::parser: } ignore = std::move(*ignore_result); if (verbose) { - printf("Loaded %zu ignore patterns from %s\n", ignore.size(), ignore_path.string().c_str()); + printf("Loaded %zu ignore patterns from %s\n", ignore.size(), ignore_path.c_str()); } break; } @@ -535,13 +560,13 @@ auto make_layout_options(Options const& opts) -> LayoutOptions { auto layout_opts = LayoutOptions {}; if (!opts.source_dir.empty()) { - layout_opts.source_dir = std::filesystem::path { opts.source_dir }; + layout_opts.source_dir = std::string { opts.source_dir }; } if (!opts.config_dir.empty()) { - layout_opts.config_dir = std::filesystem::path { opts.config_dir }; + layout_opts.config_dir = std::string { opts.config_dir }; } if (!opts.build_dirs.empty()) { - layout_opts.build_dir = std::filesystem::path { opts.build_dirs[0] }; + layout_opts.build_dir = std::string { opts.build_dirs[0] }; } return layout_opts; } @@ -591,7 +616,7 @@ auto BuildContext::vars() const -> parser::VarDb const& return impl_->vars; } -auto BuildContext::parsed_dirs() const -> std::set const& +auto BuildContext::parsed_dirs() const -> std::vector const& { return impl_->state.parsed; } @@ -617,8 +642,8 @@ auto build_context( // Early tup.config check (before expensive parsing) if (ctx_opts.require_config) { - auto config_path = ctx.impl_->layout.output_root / "tup.config"; - if (!std::filesystem::exists(config_path)) { + auto config_path = pup::path::join(ctx.impl_->layout.output_root, "tup.config"); + if (!pup::platform::exists(config_path)) { return make_error( ErrorCode::NotFound, "No tup.config found. Run 'pup configure' first." @@ -628,11 +653,10 @@ auto build_context( // Set build root name for variant builds (before parsing) if (ctx.impl_->layout.source_root != ctx.impl_->layout.output_root) { - auto build_root_name = std::filesystem::relative( - ctx.impl_->layout.output_root, - ctx.impl_->layout.source_root - ) - .generic_string(); + auto build_root_name = pup::path::relative( + ctx.impl_->layout.output_root, + ctx.impl_->layout.source_root + ); ctx.impl_->graph.set_build_root_name(std::move(build_root_name)); } @@ -654,13 +678,13 @@ auto build_context( } // 4. Load config (seeds the per-file parse cache for find_config_for_dir) - auto config_path = ctx.impl_->layout.output_root / "tup.config"; - if (std::filesystem::exists(config_path)) { + auto config_path = pup::path::join(ctx.impl_->layout.output_root, "tup.config"); + if (pup::platform::exists(config_path)) { auto const* root_cfg = get_or_parse_config(config_path, ctx.impl_->state); if (root_cfg) { ctx.impl_->config_vars = *root_cfg; if (ctx_opts.verbose) { - printf("Loaded %zu config variables from %s\n", ctx.impl_->config_vars.names().size(), config_path.string().c_str()); + printf("Loaded %zu config variables from %s\n", ctx.impl_->config_vars.names().size(), config_path.c_str()); } } } @@ -711,11 +735,11 @@ auto build_context( }; for (auto const& dir : sort_dirs_by_depth(ctx.impl_->state.available)) { - if (ctx.impl_->state.parsed.contains(dir)) { + if (sorted_contains(ctx.impl_->state.parsed, dir)) { continue; } if (!ctx_opts.parse_scopes.empty() - && !pup::is_path_in_any_scope(dir.generic_string(), ctx_opts.parse_scopes)) { + && !pup::is_path_in_any_scope(dir, ctx_opts.parse_scopes)) { continue; } auto result = Result { parse_directory(dir, parse_ctx) }; @@ -740,22 +764,22 @@ auto build_context( auto resolve_clean_context(Options const& opts) -> std::optional { - auto cwd = std::filesystem::current_path(); + auto cwd = *pup::platform::current_directory(); auto root = find_project_root(cwd); if (!root) { return std::nullopt; } - auto build_dir = std::filesystem::path {}; + auto build_dir = std::string {}; auto is_in_tree = false; if (!opts.build_dirs.empty()) { - build_dir = std::filesystem::path { opts.build_dirs[0] }; - if (build_dir.is_relative()) { - build_dir = *root / build_dir; + build_dir = std::string { opts.build_dirs[0] }; + if (!pup::path::is_absolute(build_dir)) { + build_dir = pup::path::join(*root, build_dir); } is_in_tree = (build_dir == *root); - } else if (std::filesystem::exists(cwd / ".pup") && cwd != *root) { + } else if (pup::platform::exists(pup::path::join(cwd, ".pup")) && cwd != *root) { // cwd contains .pup and is not source root - we're inside a build directory build_dir = cwd; is_in_tree = false; @@ -763,8 +787,8 @@ auto resolve_clean_context(Options const& opts) -> std::optional // Prefer build subdirectory with .pup/index over source root build_dir = *detected; is_in_tree = false; - } else if (std::filesystem::exists(*root / "tup.config") - || std::filesystem::exists(*root / ".pup")) { + } else if (pup::platform::exists(pup::path::join(*root, "tup.config")) + || pup::platform::exists(pup::path::join(*root, ".pup"))) { // Fall back to source root for in-tree builds build_dir = *root; is_in_tree = true; diff --git a/src/cli/multi_variant.cpp b/src/cli/multi_variant.cpp index 712f176..1619ae3 100644 --- a/src/cli/multi_variant.cpp +++ b/src/cli/multi_variant.cpp @@ -5,8 +5,10 @@ #include "pup/cli/context.hpp" #include "pup/cli/target.hpp" #include "pup/core/layout.hpp" +#include "pup/core/path.hpp" #include "pup/core/path_utils.hpp" #include "pup/core/result.hpp" +#include "pup/platform/file_io.hpp" #include #include @@ -19,14 +21,14 @@ namespace pup::cli { namespace { struct ParsedTargets { - std::vector variants; + std::vector variants; std::vector scopes; - std::vector output_targets; // Specific output file targets + std::vector output_targets; bool has_variant_targets = false; }; auto parse_targets_for_variants( - std::filesystem::path const& source_root, + std::string const& source_root, std::vector const& targets ) -> pup::Result { @@ -45,20 +47,19 @@ auto parse_targets_for_variants( for (auto const& target : *parsed) { if (target.variant.has_value()) { result.has_variant_targets = true; - variant_set.insert(target.variant->string()); + variant_set.insert(*target.variant); if (!target.scope_or_output.empty()) { if (target.is_output) { - // Store source-root-relative path (graph uses source-root-relative) - result.output_targets.push_back(target.scope_or_output.generic_string()); + result.output_targets.push_back(target.scope_or_output); } else { - result.scopes.push_back(target.scope_or_output.generic_string()); + result.scopes.push_back(target.scope_or_output); } } } else { if (target.is_output) { - result.output_targets.push_back(target.scope_or_output.generic_string()); + result.output_targets.push_back(target.scope_or_output); } else { - result.scopes.push_back(target.scope_or_output.generic_string()); + result.scopes.push_back(target.scope_or_output); } } } @@ -78,7 +79,6 @@ auto for_each_variant( std::string_view command_name ) -> int { - // Discover project layout auto layout_result = Result { discover_layout(make_layout_options(opts)) }; if (!layout_result) { fprintf(stderr, "Error: %s\n", layout_result.error().message.c_str()); @@ -87,15 +87,13 @@ auto for_each_variant( auto const& source_root = layout_result->source_root; - // Parse targets to extract variants and scopes auto parsed_targets = parse_targets_for_variants(source_root, opts.targets); if (!parsed_targets.has_value()) { fprintf(stderr, "Error: %s\n", parsed_targets.error().message.c_str()); return EXIT_FAILURE; } - // Determine variants to process - auto variants = std::vector {}; + auto variants = std::vector {}; auto scopes = std::vector {}; auto output_targets = std::vector {}; @@ -115,7 +113,6 @@ auto for_each_variant( output_targets = parsed_targets->output_targets; } - // No variants found - in-tree operation if (variants.empty()) { auto modified_opts = Options { opts }; modified_opts.targets = scopes; @@ -123,42 +120,35 @@ auto for_each_variant( return handler(modified_opts, "."); } - // Check if cwd is inside one of the discovered variants. - // If so, let discover_layout detect it via its cwd/tup.config logic - // instead of setting build_dirs (which would resolve relative to cwd). - auto cwd = std::filesystem::current_path(); - auto cwd_variant = std::optional {}; + auto cwd = *pup::platform::current_directory(); + auto cwd_variant = std::optional {}; for (auto const& variant : variants) { - auto variant_abs = source_root / variant; + auto variant_abs = pup::path::join(source_root, variant); if (pup::is_path_under(cwd, variant_abs)) { cwd_variant = variant; break; } } - // If cwd is inside a variant, use that variant without setting build_dirs if (cwd_variant) { auto single_opts = Options { opts }; - // Don't set build_dirs - let discover_layout detect via cwd/tup.config single_opts.targets = scopes; single_opts.output_targets = output_targets; - return handler(single_opts, cwd_variant->filename().string()); + return handler(single_opts, std::string { pup::path::filename(*cwd_variant) }); } - // Single variant - direct call if (variants.size() == 1) { auto single_opts = Options { opts }; - single_opts.build_dirs = { variants[0].string() }; + single_opts.build_dirs = { variants[0] }; single_opts.targets = scopes; single_opts.output_targets = output_targets; - return handler(single_opts, variants[0].filename().string()); + return handler(single_opts, std::string { pup::path::filename(variants[0]) }); } - // Multiple variants - parallel execution if (opts.verbose) { printf("%.*s %zu variants in parallel:\n", static_cast(command_name.size()), command_name.data(), variants.size()); for (auto const& v : variants) { - printf(" %s\n", v.string().c_str()); + printf(" %s\n", v.c_str()); } } @@ -168,15 +158,14 @@ auto for_each_variant( std::launch::async, [&opts, &handler, &scopes, &output_targets, variant = variant] { auto variant_opts = Options { opts }; - variant_opts.build_dirs = { variant.string() }; + variant_opts.build_dirs = { variant }; variant_opts.targets = scopes; variant_opts.output_targets = output_targets; - return handler(variant_opts, variant.filename().string()); + return handler(variant_opts, std::string { pup::path::filename(variant) }); } )); } - // Collect results auto failed = 0; for (auto& future : futures) { if (future.get() != 0) { diff --git a/src/cli/output.cpp b/src/cli/output.cpp index 2c08e3c..13a6b1b 100644 --- a/src/cli/output.cpp +++ b/src/cli/output.cpp @@ -2,6 +2,8 @@ // Copyright (c) 2024 Putup authors #include "pup/cli/output.hpp" +#include "pup/core/path.hpp" +#include "pup/platform/file_io.hpp" #include #include @@ -16,17 +18,19 @@ constexpr auto ASCII_CONTROL_CHAR_MAX = static_cast(0x1F); } auto remove_empty_directories( - std::set const& output_dirs, - std::filesystem::path const& build_dir, - std::filesystem::path const& source_dir, + std::vector const& output_dirs, + std::string const& build_dir, + std::string const& source_dir, OutputMode mode ) -> std::size_t { auto removed = std::size_t { 0 }; - auto dirs = std::vector(output_dirs.begin(), output_dirs.end()); + auto dirs = output_dirs; + std::ranges::sort(dirs); + dirs.erase(std::unique(dirs.begin(), dirs.end()), dirs.end()); std::ranges::sort(dirs, std::greater {}, [](auto const& p) { - return p.string().size(); + return p.size(); }); for (auto const& dir : dirs) { @@ -34,22 +38,22 @@ auto remove_empty_directories( continue; } - auto rel = std::filesystem::relative(dir, build_dir); - if (rel.string().starts_with("..")) { + auto rel = pup::path::relative(dir, build_dir); + if (rel.starts_with("..")) { continue; } - if (!std::filesystem::exists(dir) || !std::filesystem::is_empty(dir)) { + if (!pup::platform::exists(dir) || !pup::platform::is_empty(dir)) { continue; } if (mode.dry_run) { - printf("Would remove empty dir: %s\n", dir.string().c_str()); + printf("Would remove empty dir: %s\n", dir.c_str()); } else { - std::filesystem::remove(dir); + (void)pup::platform::remove_file(dir); ++removed; if (mode.verbose) { - printf("Removed empty dir: %s\n", dir.string().c_str()); + printf("Removed empty dir: %s\n", dir.c_str()); } } } diff --git a/src/cli/target.cpp b/src/cli/target.cpp index 5efd3ea..2cb5c5a 100644 --- a/src/cli/target.cpp +++ b/src/cli/target.cpp @@ -2,29 +2,28 @@ // Copyright (c) 2024 Putup authors #include "pup/cli/target.hpp" +#include "pup/core/path.hpp" +#include "pup/platform/file_io.hpp" #include #include -namespace fs = std::filesystem; - namespace pup { namespace { -auto is_source_file(fs::path const& path) -> bool +auto is_source_file(std::string const& p) -> bool { - auto const ext = path.extension().string(); + auto ext = std::string { pup::path::extension(p) }; static auto const source_exts = std::set { ".c", ".cc", ".cpp", ".cxx", ".C", ".h", ".hh", ".hpp", ".hxx", ".H", ".s", ".S", ".asm" }; return source_exts.contains(ext); } -auto is_variant_dir(fs::path const& dir) -> bool +auto is_variant_dir(std::string const& dir) -> bool { - auto config_path = dir / "tup.config"; - return fs::exists(config_path); + return pup::platform::exists(pup::path::join(dir, "tup.config")); } auto fnmatch_simple(std::string const& pattern, std::string const& name) -> bool @@ -44,10 +43,19 @@ auto fnmatch_simple(std::string const& pattern, std::string const& name) -> bool return name.starts_with(prefix) && name.ends_with(suffix); } +auto split_first_component(std::string const& p) -> std::pair +{ + auto slash = p.find('/'); + if (slash == std::string::npos) { + return { p, {} }; + } + return { p.substr(0, slash), p.substr(slash + 1) }; +} + } // namespace auto parse_target( - fs::path const& project_root, + std::string const& project_root, std::string const& target_path ) -> Result { @@ -55,52 +63,40 @@ auto parse_target( return unexpected { Error { ErrorCode::InvalidArgument, "empty target path" } }; } - auto full_path = project_root / target_path; + auto full_path = pup::path::join(project_root, target_path); auto target = Target {}; - auto path = fs::path { target_path }; - auto first_component = *path.begin(); - auto variant_path = project_root / first_component; + auto [first_component, remainder] = split_first_component(target_path); + + auto variant_path = pup::path::join(project_root, first_component); if (is_variant_dir(variant_path)) { target.variant = first_component; - - auto remainder = fs::path {}; - auto it = path.begin(); - ++it; - for (; it != path.end(); ++it) { - remainder /= *it; - } - target.scope_or_output = remainder; - full_path = variant_path / remainder; + full_path = pup::path::join(variant_path, remainder); } else { - target.scope_or_output = path; + target.scope_or_output = target_path; } - if (fs::exists(full_path)) { - if (fs::is_regular_file(full_path)) { + if (pup::platform::exists(full_path)) { + if (pup::platform::is_file(full_path)) { if (is_source_file(full_path)) { return unexpected { Error { ErrorCode::InvalidArgument, "source file, not build output: " + target_path } }; } - target.is_output = true; } } else { - // Path doesn't exist - check if parent exists (output target for from-scratch build) - auto parent = full_path.parent_path(); - if (parent.empty()) { - parent = project_root; + auto par = std::string { pup::path::parent(full_path) }; + if (par.empty()) { + par = project_root; } - if (!fs::exists(parent)) { + if (!pup::platform::exists(par)) { return unexpected { Error { ErrorCode::NotFound, "path not found: " + target_path } }; } - // Reject source file extensions even for non-existent files if (is_source_file(full_path)) { return unexpected { Error { ErrorCode::InvalidArgument, "source file, not build output: " + target_path } }; } - // Parent exists - assume output file (validate in cmd_build.cpp after graph is built) target.is_output = true; } @@ -108,18 +104,17 @@ auto parse_target( } auto expand_glob_target( - fs::path const& project_root, + std::string const& project_root, std::string const& pattern ) -> std::vector { auto result = std::vector {}; - auto path = fs::path { pattern }; - if (path.empty()) { + if (pattern.empty()) { return result; } - auto first_component = path.begin()->string(); + auto [first_component, remainder] = split_first_component(pattern); auto has_glob = first_component.find('*') != std::string::npos; if (!has_glob) { @@ -130,40 +125,36 @@ auto expand_glob_target( return result; } - auto remainder = fs::path {}; - auto it = path.begin(); - ++it; - for (; it != path.end(); ++it) { - remainder /= *it; + auto entries = pup::platform::read_directory(project_root); + if (!entries) { + return result; } - std::error_code ec; - for (auto const& entry : fs::directory_iterator(project_root, ec)) { - if (!entry.is_directory()) { + for (auto const& entry : *entries) { + if (!entry.is_dir) { continue; } - auto name = entry.path().filename().string(); - if (!fnmatch_simple(first_component, name)) { + if (!fnmatch_simple(first_component, entry.name)) { continue; } - if (!is_variant_dir(entry.path())) { + auto entry_path = pup::path::join(project_root, entry.name); + if (!is_variant_dir(entry_path)) { continue; } auto target = Target {}; - target.variant = entry.path().filename(); + target.variant = entry.name; if (!remainder.empty()) { - auto full_path = entry.path() / remainder; + auto full_path = pup::path::join(entry_path, remainder); target.scope_or_output = remainder; - if (fs::is_regular_file(full_path)) { + if (pup::platform::is_file(full_path)) { target.is_output = true; - } else if (!fs::is_directory(full_path) && !is_source_file(full_path)) { - // Non-existent file that's not a source - assume output (validate later) - auto parent = full_path.parent_path(); - if (!parent.empty() && fs::exists(parent)) { + } else if (!pup::platform::is_directory(full_path) && !is_source_file(full_path)) { + auto par = std::string { pup::path::parent(full_path) }; + if (!par.empty() && pup::platform::exists(par)) { target.is_output = true; } } @@ -185,12 +176,11 @@ auto is_glob_pattern(std::string const& s) -> bool } // namespace auto validate_target_consistency( - fs::path const& project_root, + std::string const& project_root, std::vector const& targets ) -> Result> { auto result = std::vector {}; - auto has_variant = std::optional {}; for (auto const& target_str : targets) { diff --git a/src/core/arena.cpp b/src/core/arena.cpp new file mode 100644 index 0000000..5137cbf --- /dev/null +++ b/src/core/arena.cpp @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "pup/core/arena.hpp" + +#include +#include +#include +#include + +namespace pup { + +Arena32::~Arena32() +{ + std::free(data_); +} + +Arena32::Arena32(Arena32&& other) noexcept + : data_(std::exchange(other.data_, nullptr)) + , size_(std::exchange(other.size_, 0)) + , capacity_(std::exchange(other.capacity_, 0)) +{ +} + +auto Arena32::operator=(Arena32&& other) noexcept -> Arena32& +{ + if (this != &other) { + std::free(data_); + data_ = std::exchange(other.data_, nullptr); + size_ = std::exchange(other.size_, 0); + capacity_ = std::exchange(other.capacity_, 0); + } + return *this; +} + +auto Arena32::grow(std::size_t needed) -> void +{ + if (needed <= capacity_) { + return; + } + auto new_cap = capacity_ == 0 ? std::size_t { 16 } : capacity_ * 2; + while (new_cap < needed) { + new_cap *= 2; + } + auto* p = static_cast(std::realloc(data_, new_cap * sizeof(std::uint32_t))); + if (!p) { + std::abort(); + } + data_ = p; + capacity_ = new_cap; +} + +auto Arena32::append(std::uint32_t const* values, std::uint32_t count) -> ArenaSlice +{ + if (count == 0) { + return ArenaSlice { static_cast(size_), 0 }; + } + auto const needed = size_ + count; + if (needed > capacity_) { + grow(needed); + } + auto const offset = static_cast(size_); + if (values) { + std::memcpy(data_ + size_, values, count * sizeof(std::uint32_t)); + } else { + std::memset(data_ + size_, 0, count * sizeof(std::uint32_t)); + } + size_ += count; + return ArenaSlice { offset, count }; +} + +auto Arena32::get(ArenaSlice slice) const -> std::uint32_t const* +{ + if (slice.length == 0) { + return nullptr; + } + return data_ + slice.offset; +} + +auto Arena32::slice(ArenaSlice s) const -> Span +{ + if (s.length == 0) { + return { nullptr, 0 }; + } + return { data_ + s.offset, s.length }; +} + +auto Arena32::at(std::uint32_t offset) -> std::uint32_t& +{ + assert(offset < size_); + return data_[offset]; +} + +auto Arena32::append_extend(ArenaSlice old, std::uint32_t new_value) -> ArenaSlice +{ + auto new_len = old.length + 1; + auto needed = size_ + new_len; + if (needed > capacity_) { + grow(needed); + } + auto new_offset = static_cast(size_); + if (old.length > 0) { + std::memcpy(data_ + new_offset, data_ + old.offset, old.length * sizeof(std::uint32_t)); + } + data_[new_offset + old.length] = new_value; + size_ += new_len; + return ArenaSlice { new_offset, new_len }; +} + +auto Arena32::size() const -> std::size_t +{ + return size_; +} + +auto Arena32::reserve(std::size_t total_elements) -> void +{ + if (total_elements > capacity_) { + grow(total_elements); + } +} + +auto Arena32::compact() -> void +{ + if (size_ == capacity_ || size_ == 0) { + if (size_ == 0 && data_) { + std::free(data_); + data_ = nullptr; + capacity_ = 0; + } + return; + } + auto* p = static_cast(std::realloc(data_, size_ * sizeof(std::uint32_t))); + if (p) { + data_ = p; + capacity_ = size_; + } +} + +auto Arena32::clear() -> void +{ + size_ = 0; +} + +} // namespace pup diff --git a/src/core/hash.cpp b/src/core/hash.cpp index 5c88bb8..ba73cba 100644 --- a/src/core/hash.cpp +++ b/src/core/hash.cpp @@ -8,8 +8,15 @@ extern "C" { #include "sha256/sha256.h" } +#include #include -#include + +#ifdef _WIN32 +# include +#else +# include +# include +#endif namespace pup { @@ -80,27 +87,59 @@ auto sha256(std::string_view data) -> Hash256 return sha256_finalize(state); } -auto sha256_file(std::filesystem::path const& path) -> Result +auto sha256_file(std::string const& path) -> Result { ++thread_metrics().hash_computations; - auto file = std::ifstream { path, std::ios::binary }; - if (!file) { - return make_error(ErrorCode::IoError, "Failed to open file: " + path.string()); +#ifdef _WIN32 + auto wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path.data(), static_cast(path.size()), nullptr, 0); + if (wlen == 0) { + wlen = MultiByteToWideChar(CP_UTF8, 0, path.data(), static_cast(path.size()), nullptr, 0); + } + auto wpath = std::wstring(static_cast(wlen), L'\0'); + MultiByteToWideChar(CP_UTF8, 0, path.data(), static_cast(path.size()), wpath.data(), wlen); + + auto file = CreateFileW(wpath.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (file == INVALID_HANDLE_VALUE) { + return make_error(ErrorCode::IoError, "Failed to open file: " + path); } auto state = sha256_init(); - auto buffer = std::array {}; + char buffer[8192]; + auto bytes_read = DWORD {}; - while (file.read(buffer.data(), buffer.size()) || file.gcount() > 0) { - auto const bytes_read = static_cast(file.gcount()); - state = sha256_update(state, std::string_view { buffer.data(), bytes_read }); + while (ReadFile(file, buffer, sizeof(buffer), &bytes_read, nullptr) && bytes_read > 0) { + state = sha256_update(state, std::string_view { buffer, bytes_read }); } - if (file.bad()) { - return make_error(ErrorCode::IoError, "Error reading file: " + path.string()); + CloseHandle(file); +#else + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + auto fd = ::open(path.c_str(), O_RDONLY); + if (fd < 0) { + return make_error(ErrorCode::IoError, "Failed to open file: " + path); } + auto state = sha256_init(); + char buffer[8192]; + + while (true) { + auto n = ::read(fd, buffer, sizeof(buffer)); + if (n < 0) { + if (errno == EINTR) { + continue; + } + break; + } + if (n == 0) { + break; + } + state = sha256_update(state, std::string_view { buffer, static_cast(n) }); + } + + ::close(fd); +#endif + return sha256_finalize(state); } diff --git a/src/core/id_array.cpp b/src/core/id_array.cpp new file mode 100644 index 0000000..0bae8ba --- /dev/null +++ b/src/core/id_array.cpp @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "pup/core/id_array.hpp" + +#include +#include +#include + +namespace pup { + +// --- IdArray32 --- + +IdArray32::~IdArray32() +{ + std::free(data_); +} + +IdArray32::IdArray32(IdArray32&& other) noexcept + : data_(std::exchange(other.data_, nullptr)) + , capacity_(std::exchange(other.capacity_, 0)) + , present_(std::move(other.present_)) +{ +} + +auto IdArray32::operator=(IdArray32&& other) noexcept -> IdArray32& +{ + if (this != &other) { + std::free(data_); + data_ = std::exchange(other.data_, nullptr); + capacity_ = std::exchange(other.capacity_, 0); + present_ = std::move(other.present_); + } + return *this; +} + +auto IdArray32::resize(std::uint32_t max_id) -> void +{ + auto const needed = static_cast(max_id) + 1; + if (needed <= capacity_) { + return; + } + auto* p = static_cast(std::realloc(data_, needed * sizeof(std::uint32_t))); + if (!p) { + std::abort(); + } + std::memset(p + capacity_, 0, (needed - capacity_) * sizeof(std::uint32_t)); + data_ = p; + capacity_ = needed; + present_.resize(max_id); +} + +auto IdArray32::set(std::uint32_t id, std::uint32_t value) -> void +{ + if (static_cast(id) >= capacity_) { + resize(id); + } + data_[id] = value; + present_.insert(id); +} + +auto IdArray32::get(std::uint32_t id) const -> std::uint32_t +{ + if (static_cast(id) >= capacity_) { + return 0; + } + return data_[id]; +} + +auto IdArray32::contains(std::uint32_t id) const -> bool +{ + return present_.contains(id); +} + +auto IdArray32::remove(std::uint32_t id) -> void +{ + present_.remove(id); +} + +auto IdArray32::clear() -> void +{ + if (data_) { + std::memset(data_, 0, capacity_ * sizeof(std::uint32_t)); + } + present_.clear(); +} + +auto IdArray32::for_each(void (*fn)(std::uint32_t id, std::uint32_t value, void* ctx), void* ctx) const -> void +{ + struct Context { + std::uint32_t const* data; + void (*fn)(std::uint32_t, std::uint32_t, void*); + void* ctx; + }; + + auto inner = Context { data_, fn, ctx }; + present_.for_each( + [](std::uint32_t id, void* raw) { + auto const* c = static_cast(raw); + c->fn(id, c->data[id], c->ctx); + }, + &inner + ); +} + +} // namespace pup diff --git a/src/core/id_bitset.cpp b/src/core/id_bitset.cpp new file mode 100644 index 0000000..15b381c --- /dev/null +++ b/src/core/id_bitset.cpp @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "pup/core/id_bitset.hpp" + +#include +#include +#include + +namespace pup { + +IdBitSet::~IdBitSet() +{ + std::free(words_); +} + +IdBitSet::IdBitSet(IdBitSet&& other) noexcept + : words_(std::exchange(other.words_, nullptr)) + , word_count_(std::exchange(other.word_count_, 0)) +{ +} + +auto IdBitSet::operator=(IdBitSet&& other) noexcept -> IdBitSet& +{ + if (this != &other) { + std::free(words_); + words_ = std::exchange(other.words_, nullptr); + word_count_ = std::exchange(other.word_count_, 0); + } + return *this; +} + +auto IdBitSet::resize(std::uint32_t max_id) -> void +{ + auto const needed = static_cast(max_id / 64) + 1; + if (needed <= word_count_) { + return; + } + auto* p = static_cast(std::realloc(words_, needed * sizeof(std::uint64_t))); + if (!p) { + std::abort(); + } + std::memset(p + word_count_, 0, (needed - word_count_) * sizeof(std::uint64_t)); + words_ = p; + word_count_ = needed; +} + +auto IdBitSet::insert(std::uint32_t id) -> void +{ + auto const word = static_cast(id / 64); + if (word >= word_count_) { + resize(id); + } + words_[word] |= std::uint64_t { 1 } << (id % 64); +} + +auto IdBitSet::remove(std::uint32_t id) -> void +{ + auto const word = static_cast(id / 64); + if (word >= word_count_) { + return; + } + words_[word] &= ~(std::uint64_t { 1 } << (id % 64)); +} + +auto IdBitSet::contains(std::uint32_t id) const -> bool +{ + auto const word = static_cast(id / 64); + if (word >= word_count_) { + return false; + } + return (words_[word] & (std::uint64_t { 1 } << (id % 64))) != 0; +} + +auto IdBitSet::clear() -> void +{ + if (words_) { + std::memset(words_, 0, word_count_ * sizeof(std::uint64_t)); + } +} + +auto IdBitSet::count() const -> std::size_t +{ + auto n = std::size_t { 0 }; + for (auto i = std::size_t { 0 }; i < word_count_; ++i) { + n += static_cast(__builtin_popcountll(words_[i])); + } + return n; +} + +auto IdBitSet::for_each(void (*fn)(std::uint32_t id, void* ctx), void* ctx) const -> void +{ + for (auto i = std::size_t { 0 }; i < word_count_; ++i) { + auto w = words_[i]; + while (w != 0) { + auto const bit = static_cast(__builtin_ctzll(w)); + fn(static_cast(i * 64) + bit, ctx); + w &= w - 1; + } + } +} + +} // namespace pup diff --git a/src/core/layout.cpp b/src/core/layout.cpp index ec69bc2..f1c68c6 100644 --- a/src/core/layout.cpp +++ b/src/core/layout.cpp @@ -2,6 +2,7 @@ // Copyright (c) 2024 Putup authors #include "pup/core/layout.hpp" +#include "pup/platform/file_io.hpp" #include #include @@ -14,34 +15,39 @@ auto const PUP_SOURCE_DIR_ENV = "PUP_SOURCE_DIR"; auto const PUP_CONFIG_DIR_ENV = "PUP_CONFIG_DIR"; auto const PUP_BUILD_DIR_ENV = "PUP_BUILD_DIR"; -auto get_env(char const* name) -> std::optional +auto get_env(char const* name) -> std::optional { if (auto const* value = std::getenv(name)) { if (*value != '\0') { - return std::filesystem::path { value }; + return std::string { value }; } } return std::nullopt; } auto find_build_subdir( - std::filesystem::path const& root -) -> std::optional + std::string const& root +) -> std::optional { for (auto const& name : { "build", "out", "variant" }) { - auto dir = std::filesystem::path { root / name }; - if (std::filesystem::exists(dir / "tup.config") - || std::filesystem::is_directory(dir / ".pup")) { + auto dir = path::join(root, name); + if (platform::exists(path::join(dir, "tup.config")) + || platform::is_directory(path::join(dir, ".pup"))) { return dir; } } - if (std::filesystem::is_directory(root)) { - for (auto const& entry : std::filesystem::directory_iterator(root)) { - if (entry.is_directory()) { - if (std::filesystem::exists(entry.path() / "tup.config") - || std::filesystem::is_directory(entry.path() / ".pup")) { - return entry.path(); + if (platform::is_directory(root)) { + auto entries = platform::read_directory(root); + if (entries) { + for (auto const& entry : *entries) { + if (!entry.is_dir) { + continue; + } + auto entry_path = path::join(root, entry.name); + if (platform::exists(path::join(entry_path, "tup.config")) + || platform::is_directory(path::join(entry_path, ".pup"))) { + return entry_path; } } } @@ -53,68 +59,60 @@ auto find_build_subdir( } // namespace auto find_project_root( - std::filesystem::path const& start_dir -) -> std::optional + std::string const& start_dir +) -> std::optional { - auto current = std::filesystem::path { start_dir }; - auto last_tupfile_dir = std::optional {}; + auto current = start_dir; + auto last_tupfile_dir = std::optional {}; while (true) { - // Tupfile.ini is the authoritative project root marker. - if (std::filesystem::exists(current / "Tupfile.ini")) { + if (platform::exists(path::join(current, "Tupfile.ini"))) { return current; } - // Track the topmost directory with a Tupfile (fallback for simple projects) - if (std::filesystem::exists(current / "Tupfile")) { + if (platform::exists(path::join(current, "Tupfile"))) { last_tupfile_dir = current; } - auto parent = std::filesystem::path { current.parent_path() }; - if (parent == current) { - // Reached filesystem root. Use the topmost Tupfile dir if found. + auto par = std::string { path::parent(current) }; + if (par == current || par.empty()) { return last_tupfile_dir; } - current = parent; + current = par; } } -/// Normalize a path: make absolute and resolve symlinks where possible. -/// Uses weakly_canonical which doesn't require the path to fully exist. -auto normalize_path(std::filesystem::path const& path) -> std::filesystem::path +auto normalize_path(std::string const& p) -> std::string { - auto ec = std::error_code {}; - auto result = std::filesystem::weakly_canonical(path, ec); - if (ec) { - return std::filesystem::absolute(path); + auto result = platform::canonical(p); + if (result) { + return *result; } - // Ensure result is absolute (weakly_canonical may return relative for non-existent paths) - if (!result.is_absolute()) { - result = std::filesystem::absolute(result); + auto abs = platform::absolute(p); + if (abs) { + return *abs; } - return result; + return p; } auto discover_layout(LayoutOptions const& opts) -> Result { auto layout = ProjectLayout {}; - auto cwd = std::filesystem::path { std::filesystem::current_path() }; + auto cwd = *platform::current_directory(); - // Step 1: Determine source_root - // Priority: CLI arg > env var > walk up from cwd if (opts.source_dir) { - if (!std::filesystem::exists(*opts.source_dir)) { + if (!platform::exists(*opts.source_dir)) { return make_error( ErrorCode::NotFound, - "Source directory not found: " + opts.source_dir->string() + "Source directory not found: " + *opts.source_dir ); } layout.source_root = normalize_path(*opts.source_dir); } else if (auto env_source = get_env(PUP_SOURCE_DIR_ENV)) { - if (!std::filesystem::exists(*env_source)) { + if (!platform::exists(*env_source)) { return make_error( ErrorCode::NotFound, - "PUP_SOURCE_DIR not found: " + env_source->string() + "PUP_SOURCE_DIR not found: " + *env_source ); } layout.source_root = normalize_path(*env_source); @@ -129,65 +127,51 @@ auto discover_layout(LayoutOptions const& opts) -> Result layout.source_root = normalize_path(*root); } - // Step 2: Determine output_root (where outputs/.pup/tup.config go) - // Priority: CLI arg > env var > cwd if has tup.config > variant subdir > source_root if (opts.build_dir) { layout.output_root = normalize_path(*opts.build_dir); } else if (auto env_build = get_env(PUP_BUILD_DIR_ENV)) { layout.output_root = normalize_path(*env_build); - } else if (std::filesystem::exists(cwd / "tup.config") && cwd != layout.source_root) { - // cwd contains tup.config and is not source_root - out-of-tree build + } else if (platform::exists(path::join(cwd, "tup.config")) && cwd != layout.source_root) { layout.output_root = normalize_path(cwd); } else if (auto build_subdir = find_build_subdir(layout.source_root)) { - // Found subdirectory with tup.config layout.output_root = normalize_path(*build_subdir); } else { - // In-tree build: outputs go to source_root layout.output_root = layout.source_root; } - // Step 3: Determine config_root (where Tupfiles live) - // Priority: CLI arg > env var > source if has Tupfile.ini > output if has Tupfile.ini > error if (opts.config_dir) { - if (!std::filesystem::exists(*opts.config_dir)) { + if (!platform::exists(*opts.config_dir)) { return make_error( ErrorCode::NotFound, - "Config directory not found: " + opts.config_dir->string() + "Config directory not found: " + *opts.config_dir ); } layout.config_root = normalize_path(*opts.config_dir); - // Validate config_root has Tupfile.ini - if (!std::filesystem::exists(layout.config_root / "Tupfile.ini")) { + if (!platform::exists(path::join(layout.config_root, "Tupfile.ini"))) { return make_error( ErrorCode::NotFound, - "Config directory does not contain Tupfile.ini: " + layout.config_root.string() + "Config directory does not contain Tupfile.ini: " + layout.config_root ); } } else if (auto env_config = get_env(PUP_CONFIG_DIR_ENV)) { - if (!std::filesystem::exists(*env_config)) { + if (!platform::exists(*env_config)) { return make_error( ErrorCode::NotFound, - "PUP_CONFIG_DIR not found: " + env_config->string() + "PUP_CONFIG_DIR not found: " + *env_config ); } layout.config_root = normalize_path(*env_config); - // Validate config_root has Tupfile.ini - if (!std::filesystem::exists(layout.config_root / "Tupfile.ini")) { + if (!platform::exists(path::join(layout.config_root, "Tupfile.ini"))) { return make_error( ErrorCode::NotFound, - "Config directory does not contain Tupfile.ini: " + layout.config_root.string() + "Config directory does not contain Tupfile.ini: " + layout.config_root ); } - } else if (std::filesystem::exists(layout.source_root / "Tupfile.ini")) { - // Traditional mode: Tupfiles alongside source (Tupfile.ini present) + } else if (platform::exists(path::join(layout.source_root, "Tupfile.ini"))) { layout.config_root = layout.source_root; - } else if (std::filesystem::exists(layout.output_root / "Tupfile.ini")) { - // Two-tree fallback: Tupfiles in output directory + } else if (platform::exists(path::join(layout.output_root, "Tupfile.ini"))) { layout.config_root = layout.output_root; } else { - // Final fallback: source_root is config_root (simple projects with just Tupfile) - // find_project_root() accepts projects with Tupfile even without Tupfile.ini, - // so we honor that by using source_root as config_root. layout.config_root = layout.source_root; } @@ -195,29 +179,31 @@ auto discover_layout(LayoutOptions const& opts) -> Result } auto discover_variants( - std::filesystem::path const& source_root -) -> std::vector + std::string const& source_root +) -> std::vector { - auto result = std::vector {}; + auto result = std::vector {}; + + if (!platform::is_directory(source_root)) { + return result; + } - if (!std::filesystem::is_directory(source_root)) { + auto entries = platform::read_directory(source_root); + if (!entries) { return result; } - auto ec = std::error_code {}; - for (auto const& entry : std::filesystem::directory_iterator(source_root, ec)) { - if (!entry.is_directory()) { + for (auto const& entry : *entries) { + if (!entry.is_dir) { continue; } - - auto const& path = entry.path(); - if (std::filesystem::exists(path / "tup.config") - || std::filesystem::is_directory(path / ".pup")) { - result.push_back(path.filename()); + auto entry_path = path::join(source_root, entry.name); + if (platform::exists(path::join(entry_path, "tup.config")) + || platform::is_directory(path::join(entry_path, ".pup"))) { + result.push_back(entry.name); } } - // Sort for deterministic order std::ranges::sort(result); return result; } diff --git a/src/core/path.cpp b/src/core/path.cpp new file mode 100644 index 0000000..edbe734 --- /dev/null +++ b/src/core/path.cpp @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "pup/core/path.hpp" + +#include + +namespace pup::path { + +auto join(std::string_view a, std::string_view b) -> std::string +{ + if (b.empty()) { + return std::string { a }; + } + if (a.empty() || is_absolute(b)) { + return std::string { b }; + } + + auto result = std::string { a }; + if (result.back() != '/') { + result += '/'; + } + result += b; + return result; +} + +auto parent(std::string_view p) -> std::string_view +{ + if (p.empty()) { + return {}; + } + + // Trim trailing slash (except root "/") + auto end = p.size(); + while (end > 1 && p[end - 1] == '/') { + --end; + } + + auto pos = p.rfind('/', end - 1); + if (pos == std::string_view::npos) { + return {}; + } + if (pos == 0) { + return p.substr(0, 1); // "/" + } + return p.substr(0, pos); +} + +auto filename(std::string_view p) -> std::string_view +{ + if (p.empty()) { + return {}; + } + auto pos = p.rfind('/'); + if (pos == std::string_view::npos) { + return p; + } + return p.substr(pos + 1); +} + +auto stem(std::string_view p) -> std::string_view +{ + auto name = filename(p); + if (name.empty() || name == "." || name == "..") { + return name; + } + auto dot = name.rfind('.'); + if (dot == 0 || dot == std::string_view::npos) { + return name; + } + return name.substr(0, dot); +} + +auto extension(std::string_view p) -> std::string_view +{ + auto name = filename(p); + if (name.empty() || name == "." || name == "..") { + return {}; + } + auto dot = name.rfind('.'); + if (dot == 0 || dot == std::string_view::npos) { + return {}; + } + return name.substr(dot); +} + +auto is_absolute(std::string_view p) -> bool +{ + if (p.empty()) { + return false; + } + if (p[0] == '/') { + return true; + } +#ifdef _WIN32 + // Drive letter: C:/ or C:\. + if (p.size() >= 3 && p[1] == ':' && (p[2] == '/' || p[2] == '\\')) { + auto c = p[0]; + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } +#endif + return false; +} + +auto normalize(std::string_view p) -> std::string +{ + auto parts = std::vector {}; + auto start = std::size_t { 0 }; + auto absolute = is_absolute(p); + + while (start < p.size()) { + auto end = p.find('/', start); + if (end == std::string_view::npos) { + end = p.size(); + } + auto part = p.substr(start, end - start); + if (part.empty() || part == ".") { + // skip + } else if (part == ".." && !parts.empty() && parts.back() != "..") { + parts.pop_back(); + } else if (part == ".." && absolute) { + // Cannot go above root — absorb + } else { + parts.push_back(part); + } + start = end + 1; + } + + if (parts.empty()) { + return absolute ? "/" : "."; + } + + auto result = std::string {}; + if (absolute) { + result = "/"; + } + for (std::size_t i = 0; i < parts.size(); ++i) { + if (i > 0 || absolute) { + if (!result.empty() && result.back() != '/') { + result += '/'; + } + } + result += parts[i]; + } + return result; +} + +auto relative(std::string_view target, std::string_view base) -> std::string +{ + if (target == base) { + return "."; + } + + // Split both paths into components + auto split = [](std::string_view p) { + auto parts = std::vector {}; + auto start = std::size_t { 0 }; + while (start < p.size()) { + auto end = p.find('/', start); + if (end == std::string_view::npos) { + end = p.size(); + } + auto part = p.substr(start, end - start); + if (!part.empty() && part != ".") { + parts.push_back(part); + } + start = end + 1; + } + return parts; + }; + + auto target_parts = split(target); + auto base_parts = split(base); + + // Find common prefix length + auto common = std::size_t { 0 }; + auto max_common = std::min(target_parts.size(), base_parts.size()); + while (common < max_common && target_parts[common] == base_parts[common]) { + ++common; + } + + auto result = std::string {}; + // Go up from base to common ancestor + for (auto i = common; i < base_parts.size(); ++i) { + if (!result.empty()) { + result += '/'; + } + result += ".."; + } + // Append remaining target path + for (auto i = common; i < target_parts.size(); ++i) { + if (!result.empty()) { + result += '/'; + } + result += target_parts[i]; + } + + return result.empty() ? "." : result; +} + +} // namespace pup::path diff --git a/src/core/path_utils.cpp b/src/core/path_utils.cpp index 0659574..0f50a5a 100644 --- a/src/core/path_utils.cpp +++ b/src/core/path_utils.cpp @@ -2,6 +2,7 @@ // Copyright (c) 2024 Putup authors #include "pup/core/path_utils.hpp" +#include "pup/core/path.hpp" #include #include @@ -9,55 +10,46 @@ namespace pup { auto is_path_under( - std::filesystem::path const& path, - std::filesystem::path const& root + std::string const& path_str, + std::string const& root ) -> bool { - auto path_str = path.generic_string(); - auto root_str = root.generic_string(); + auto root_str = root; - // Handle trailing slash while (!root_str.empty() && root_str.back() == '/') { root_str.pop_back(); } - // Exact match if (path_str == root_str) { return true; } - // Check prefix with directory boundary if (!path_str.starts_with(root_str)) { return false; } - // Ensure we match at directory boundary return path_str[root_str.size()] == '/'; } auto relative_to_root( - std::filesystem::path const& path, - std::filesystem::path const& root + std::string const& path_str, + std::string const& root ) -> std::string { - if (!is_path_under(path, root)) { + if (!is_path_under(path_str, root)) { return ""; } - auto path_str = path.generic_string(); - auto root_str = root.generic_string(); + auto root_str = root; - // Handle trailing slash while (!root_str.empty() && root_str.back() == '/') { root_str.pop_back(); } - // Exact match returns empty if (path_str == root_str) { return ""; } - // Skip root prefix and separator return path_str.substr(root_str.size() + 1); } @@ -70,7 +62,6 @@ auto is_path_in_scope( return true; } - // Strip trailing separators from scope while (!scope.empty() && (scope.back() == '/' || scope.back() == '\\')) { scope.remove_suffix(1); } @@ -87,7 +78,6 @@ auto is_path_in_scope( return true; } - // Ensure directory boundary auto const sep = path[scope.size()]; return sep == '/' || sep == '\\'; } @@ -127,64 +117,64 @@ auto compute_source_to_root(std::string_view source_dir) -> std::string } auto make_source_relative( - std::string_view path, + std::string_view path_sv, std::string_view source_to_root, std::string_view source_dir ) -> std::string { - if (path.empty() || path[0] == '/') { - return std::string { path }; + if (path_sv.empty() || path_sv[0] == '/') { + return std::string { path_sv }; } - if (path.size() >= 2 && path[0] == '.' && path[1] == '.') { + if (path_sv.size() >= 2 && path_sv[0] == '.' && path_sv[1] == '.') { if (!source_to_root.empty() && !source_dir.empty()) { - return std::string { source_to_root } + std::string { path }; + return std::string { source_to_root } + std::string { path_sv }; } - return std::string { path }; + return std::string { path_sv }; } if (source_to_root.empty()) { - return std::string { path }; + return std::string { path_sv }; } auto dir_prefix = std::string { source_dir } + "/"; - if (path.starts_with(dir_prefix)) { - return std::string { path.substr(dir_prefix.size()) }; + if (path_sv.starts_with(dir_prefix)) { + return std::string { path_sv.substr(dir_prefix.size()) }; } - if (path == source_dir) { + if (path_sv == source_dir) { return "."; } - return std::string { source_to_root } + std::string { path }; + return std::string { source_to_root } + std::string { path_sv }; } auto strip_path_prefix( - std::string_view path, + std::string_view path_sv, std::string_view prefix ) -> std::string { if (prefix.empty()) { - return std::string { path }; + return std::string { path_sv }; } auto prefix_with_slash = std::string { prefix } + "/"; - if (path.starts_with(prefix_with_slash)) { - return std::string { path.substr(prefix_with_slash.size()) }; + if (path_sv.starts_with(prefix_with_slash)) { + return std::string { path_sv.substr(prefix_with_slash.size()) }; } - return std::string { path }; + return std::string { path_sv }; } auto resolve_under_root( - std::string_view path, - std::filesystem::path const& source_root, - std::filesystem::path const& target_root + std::string_view path_sv, + std::string const& source_root, + std::string const& target_root ) -> std::optional { - if (!path.starts_with("..")) { + if (!path_sv.starts_with("..")) { return std::nullopt; } - auto abs_path = (source_root / path).lexically_normal(); - auto target_prefix = target_root.lexically_normal(); - auto rel = abs_path.lexically_relative(target_prefix); + auto abs_path = path::normalize(path::join(source_root, path_sv)); + auto target_prefix = path::normalize(target_root); + auto rel = path::relative(abs_path, target_prefix); - if (!rel.empty() && !rel.string().starts_with("..")) { - return rel.generic_string(); + if (!rel.empty() && !rel.starts_with("..")) { + return rel; } return std::nullopt; } diff --git a/src/core/sorted_id_vec.cpp b/src/core/sorted_id_vec.cpp new file mode 100644 index 0000000..cafece3 --- /dev/null +++ b/src/core/sorted_id_vec.cpp @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "pup/core/sorted_id_vec.hpp" + +#include +#include +#include + +namespace pup { + +// --- SortedIdVec --- + +SortedIdVec::~SortedIdVec() +{ + std::free(data_); +} + +SortedIdVec::SortedIdVec(SortedIdVec&& other) noexcept + : data_(std::exchange(other.data_, nullptr)) + , size_(std::exchange(other.size_, 0)) + , capacity_(std::exchange(other.capacity_, 0)) +{ +} + +auto SortedIdVec::operator=(SortedIdVec&& other) noexcept -> SortedIdVec& +{ + if (this != &other) { + std::free(data_); + data_ = std::exchange(other.data_, nullptr); + size_ = std::exchange(other.size_, 0); + capacity_ = std::exchange(other.capacity_, 0); + } + return *this; +} + +auto SortedIdVec::grow() -> void +{ + auto const new_cap = capacity_ == 0 ? std::size_t { 8 } : capacity_ * 2; + auto* p = static_cast(std::realloc(data_, new_cap * sizeof(std::uint32_t))); + if (!p) { + std::abort(); + } + data_ = p; + capacity_ = new_cap; +} + +auto SortedIdVec::lower_bound(std::uint32_t id) const -> std::size_t +{ + auto lo = std::size_t { 0 }; + auto hi = size_; + while (lo < hi) { + auto const mid = lo + (hi - lo) / 2; + if (data_[mid] < id) { + lo = mid + 1; + } else { + hi = mid; + } + } + return lo; +} + +auto SortedIdVec::insert(std::uint32_t id) -> bool +{ + auto const pos = lower_bound(id); + if (pos < size_ && data_[pos] == id) { + return false; + } + if (size_ == capacity_) { + grow(); + } + if (pos < size_) { + std::memmove(data_ + pos + 1, data_ + pos, (size_ - pos) * sizeof(std::uint32_t)); + } + data_[pos] = id; + ++size_; + return true; +} + +auto SortedIdVec::contains(std::uint32_t id) const -> bool +{ + auto const pos = lower_bound(id); + return pos < size_ && data_[pos] == id; +} + +auto SortedIdVec::remove(std::uint32_t id) -> bool +{ + auto const pos = lower_bound(id); + if (pos >= size_ || data_[pos] != id) { + return false; + } + if (pos + 1 < size_) { + std::memmove(data_ + pos, data_ + pos + 1, (size_ - pos - 1) * sizeof(std::uint32_t)); + } + --size_; + return true; +} + +auto SortedIdVec::clear() -> void +{ + size_ = 0; +} + +auto SortedIdVec::size() const -> std::size_t +{ + return size_; +} + +auto SortedIdVec::merge_from(SortedIdVec const& other) -> void +{ + auto const* d = other.data(); + for (auto i = std::size_t { 0 }; i < other.size(); ++i) { + insert(d[i]); + } +} + +auto SortedIdVec::data() const -> std::uint32_t const* +{ + return data_; +} + +auto SortedIdVec::for_each(void (*fn)(std::uint32_t id, void* ctx), void* ctx) const -> void +{ + for (auto i = std::size_t { 0 }; i < size_; ++i) { + fn(data_[i], ctx); + } +} + +// --- SortedPairVec --- + +SortedPairVec::~SortedPairVec() +{ + std::free(data_); +} + +SortedPairVec::SortedPairVec(SortedPairVec&& other) noexcept + : data_(std::exchange(other.data_, nullptr)) + , size_(std::exchange(other.size_, 0)) + , capacity_(std::exchange(other.capacity_, 0)) +{ +} + +auto SortedPairVec::operator=(SortedPairVec&& other) noexcept -> SortedPairVec& +{ + if (this != &other) { + std::free(data_); + data_ = std::exchange(other.data_, nullptr); + size_ = std::exchange(other.size_, 0); + capacity_ = std::exchange(other.capacity_, 0); + } + return *this; +} + +auto SortedPairVec::grow() -> void +{ + auto const new_cap = capacity_ == 0 ? std::size_t { 8 } : capacity_ * 2; + auto* p = static_cast(std::realloc(data_, new_cap * sizeof(Pair))); + if (!p) { + std::abort(); + } + data_ = p; + capacity_ = new_cap; +} + +auto SortedPairVec::lower_bound(std::uint32_t key) const -> std::size_t +{ + auto lo = std::size_t { 0 }; + auto hi = size_; + while (lo < hi) { + auto const mid = lo + (hi - lo) / 2; + if (data_[mid].key < key) { + lo = mid + 1; + } else { + hi = mid; + } + } + return lo; +} + +auto SortedPairVec::insert(std::uint32_t key, std::uint32_t value) -> bool +{ + auto const pos = lower_bound(key); + if (pos < size_ && data_[pos].key == key) { + data_[pos].value = value; + return false; + } + if (size_ == capacity_) { + grow(); + } + if (pos < size_) { + std::memmove(data_ + pos + 1, data_ + pos, (size_ - pos) * sizeof(Pair)); + } + data_[pos] = Pair { key, value }; + ++size_; + return true; +} + +auto SortedPairVec::find(std::uint32_t key) const -> std::uint32_t const* +{ + auto const pos = lower_bound(key); + if (pos >= size_ || data_[pos].key != key) { + return nullptr; + } + return &data_[pos].value; +} + +auto SortedPairVec::contains(std::uint32_t key) const -> bool +{ + auto const pos = lower_bound(key); + return pos < size_ && data_[pos].key == key; +} + +auto SortedPairVec::remove(std::uint32_t key) -> bool +{ + auto const pos = lower_bound(key); + if (pos >= size_ || data_[pos].key != key) { + return false; + } + if (pos + 1 < size_) { + std::memmove(data_ + pos, data_ + pos + 1, (size_ - pos - 1) * sizeof(Pair)); + } + --size_; + return true; +} + +auto SortedPairVec::clear() -> void +{ + size_ = 0; +} + +auto SortedPairVec::size() const -> std::size_t +{ + return size_; +} + +auto SortedPairVec::for_each(void (*fn)(std::uint32_t key, std::uint32_t value, void* ctx), void* ctx) const -> void +{ + for (auto i = std::size_t { 0 }; i < size_; ++i) { + fn(data_[i].key, data_[i].value, ctx); + } +} + +} // namespace pup diff --git a/src/core/string_pool.cpp b/src/core/string_pool.cpp index d9ed705..c1f14ee 100644 --- a/src/core/string_pool.cpp +++ b/src/core/string_pool.cpp @@ -3,12 +3,150 @@ #include "pup/core/string_pool.hpp" +#include +#include + namespace pup { +namespace { + +auto fnv1a(std::string_view s) -> std::uint32_t +{ + auto h = std::uint32_t { 2166136261u }; + for (auto c : s) { + h ^= static_cast(static_cast(c)); + h *= 16777619u; + } + return h; +} + +auto fix_hash(std::uint32_t h) -> std::uint32_t +{ + return h < 2 ? h + 2 : h; +} + +auto next_power_of_two(std::size_t n) -> std::size_t +{ + auto v = n - 1; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + return v + 1; +} + +} // namespace + StringPool::StringPool() = default; -StringPool::~StringPool() = default; -StringPool::StringPool(StringPool&&) noexcept = default; -auto StringPool::operator=(StringPool&&) noexcept -> StringPool& = default; + +StringPool::~StringPool() +{ + std::free(meta_); + std::free(values_); +} + +StringPool::StringPool(StringPool&& other) noexcept + : storage_(std::move(other.storage_)) + , meta_(std::exchange(other.meta_, nullptr)) + , values_(std::exchange(other.values_, nullptr)) + , index_capacity_(std::exchange(other.index_capacity_, 0)) + , index_count_(std::exchange(other.index_count_, 0)) +{ +} + +auto StringPool::operator=(StringPool&& other) noexcept -> StringPool& +{ + if (this != &other) { + std::free(meta_); + std::free(values_); + + storage_ = std::move(other.storage_); + meta_ = std::exchange(other.meta_, nullptr); + values_ = std::exchange(other.values_, nullptr); + index_capacity_ = std::exchange(other.index_capacity_, 0); + index_count_ = std::exchange(other.index_count_, 0); + } + return *this; +} + +auto StringPool::key_at(std::size_t slot) const -> std::string_view +{ + return storage_[to_underlying(values_[slot]) - 1]; +} + +auto StringPool::probe_find(std::uint32_t h, std::string_view key) const -> StringId +{ + if (index_capacity_ == 0) { + return StringId::Empty; + } + + auto mask = index_capacity_ - 1; + auto slot = static_cast(h) & mask; + auto disp = std::uint16_t { 0 }; + + for (;;) { + if (meta_[slot].hash == 0) { + return StringId::Empty; + } + if (meta_[slot].displacement < disp) { + return StringId::Empty; + } + if (meta_[slot].hash == h && key_at(slot) == key) { + return values_[slot]; + } + ++disp; + slot = (slot + 1) & mask; + } +} + +auto StringPool::probe_insert(std::uint32_t h, StringId id) -> void +{ + auto mask = index_capacity_ - 1; + auto slot = static_cast(h) & mask; + auto disp = std::uint16_t { 0 }; + + for (;;) { + if (meta_[slot].hash == 0) { + meta_[slot] = { h, disp }; + values_[slot] = id; + return; + } + if (meta_[slot].displacement < disp) { + std::swap(h, meta_[slot].hash); + std::swap(disp, meta_[slot].displacement); + std::swap(id, values_[slot]); + } + ++disp; + slot = (slot + 1) & mask; + } +} + +auto StringPool::grow() -> void +{ + auto new_cap = index_capacity_ == 0 ? std::size_t { 16 } : index_capacity_ * 2; + + auto* old_meta = meta_; + auto* old_values = values_; + auto old_cap = index_capacity_; + + meta_ = static_cast(std::calloc(new_cap, sizeof(Meta))); + values_ = static_cast(std::malloc(new_cap * sizeof(StringId))); + if (!meta_ || !values_) { + std::abort(); + } + index_capacity_ = new_cap; + + for (auto i = std::size_t { 0 }; i < old_cap; ++i) { + if (old_meta[i].hash >= 2) { + probe_insert(old_meta[i].hash, old_values[i]); + } + } + + std::free(old_meta); + std::free(old_values); +} auto StringPool::intern(std::string_view str) -> StringId { @@ -16,15 +154,21 @@ auto StringPool::intern(std::string_view str) -> StringId return StringId::Empty; } - if (auto it = index_.find(str); it != index_.end()) { - return it->second; + auto h = fix_hash(fnv1a(str)); + + if (auto existing = probe_find(h, str); !is_empty(existing)) { + return existing; } auto const id = make_string_id(static_cast(storage_.size() + 1)); - storage_.emplace_back(str); - auto const& stored = storage_.back(); - index_.emplace(std::string_view { stored }, id); + + if (index_count_ >= index_capacity_ * 4 / 5) { + grow(); + } + + probe_insert(h, id); + ++index_count_; return id; } @@ -49,11 +193,7 @@ auto StringPool::find(std::string_view str) const -> StringId return StringId::Empty; } - if (auto it = index_.find(str); it != index_.end()) { - return it->second; - } - - return StringId::Empty; + return probe_find(fix_hash(fnv1a(str)), str); } auto StringPool::size() const -> std::size_t @@ -64,12 +204,42 @@ auto StringPool::size() const -> std::size_t auto StringPool::clear() -> void { storage_.clear(); - index_.clear(); + std::free(meta_); + std::free(values_); + meta_ = nullptr; + values_ = nullptr; + index_capacity_ = 0; + index_count_ = 0; } auto StringPool::reserve(std::size_t count) -> void { - index_.reserve(count); + auto needed = count * 5 / 4 + 1; + auto cap = next_power_of_two(needed < 16 ? 16 : needed); + + if (cap <= index_capacity_) { + return; + } + + auto* old_meta = meta_; + auto* old_values = values_; + auto old_cap = index_capacity_; + + meta_ = static_cast(std::calloc(cap, sizeof(Meta))); + values_ = static_cast(std::malloc(cap * sizeof(StringId))); + if (!meta_ || !values_) { + std::abort(); + } + index_capacity_ = cap; + + for (auto i = std::size_t { 0 }; i < old_cap; ++i) { + if (old_meta[i].hash >= 2) { + probe_insert(old_meta[i].hash, old_values[i]); + } + } + + std::free(old_meta); + std::free(old_values); } } // namespace pup diff --git a/src/exec/progress_display.cpp b/src/exec/progress_display.cpp index 2ce906d..3374f2c 100644 --- a/src/exec/progress_display.cpp +++ b/src/exec/progress_display.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include namespace pup::exec { @@ -52,7 +52,7 @@ auto job_completed(ProgressState state, NodeId id, bool success) -> ProgressStat auto render_tty(ProgressState const& state, std::string_view variant) -> ProgressOutput { auto result = ProgressOutput {}; - auto out = std::ostringstream {}; + auto out = std::string {}; auto term_width = static_cast(pup::terminal_width()); @@ -70,54 +70,59 @@ auto render_tty(ProgressState const& state, std::string_view variant) -> Progres current_display = sorted.back().display; } - // Build progress line prefix to calculate available width - auto prefix = std::ostringstream {}; + auto prefix = std::string {}; if (!variant.empty()) { - prefix << "[" << variant << "] "; + prefix += '['; + prefix += variant; + prefix += "] "; } - prefix << "["; - if (pct < 10) { - prefix << " "; - } else if (pct < 100) { - prefix << " "; - } - prefix << pct << "% " << done << "/" << state.total << "] "; - auto prefix_str = prefix.str(); - auto path_width = term_width > prefix_str.size() ? term_width - prefix_str.size() : std::size_t { 20 }; - - out << prefix_str << truncate_left(current_display, path_width) << pup::ansi::clear_line; + char buf[64]; + std::snprintf(buf, sizeof(buf), "[%3zu%% %zu/%zu] ", pct, done, state.total); + prefix += buf; + auto path_width = term_width > prefix.size() ? term_width - prefix.size() : std::size_t { 20 }; + + out += prefix; + out += truncate_left(current_display, path_width); + out += pup::ansi::clear_line; result.line_count = 1; - // Exclude the last job (shown on progress line) from the running list auto list_size = sorted.size() > 1 ? sorted.size() - 1 : std::size_t { 0 }; auto max_jobs = std::min(list_size, MAX_RUNNING_JOBS_DISPLAY); - // Running jobs prefix: " M:SS " = 9 chars minimum (more for longer times) auto constexpr job_prefix_width = std::size_t { 9 }; auto job_path_width = term_width > job_prefix_width ? term_width - job_prefix_width : std::size_t { 20 }; for (std::size_t i = 0; i < max_jobs; ++i) { auto const& job = sorted[i]; auto elapsed = std::chrono::duration_cast(now - job.start_time); - out << "\n " << format_duration(elapsed) << " " << truncate_left(job.display, job_path_width) << pup::ansi::clear_line; + out += '\n'; + out += " "; + out += format_duration(elapsed); + out += ' '; + out += truncate_left(job.display, job_path_width); + out += pup::ansi::clear_line; ++result.line_count; } - result.text = out.str(); + result.text = std::move(out); return result; } auto render_simple(ProgressState const& state, std::string_view variant) -> std::string { - auto out = std::ostringstream {}; + auto out = std::string {}; auto done = state.completed + state.failed; if (!variant.empty()) { - out << "[" << variant << "] "; + out += '['; + out += variant; + out += "] "; } - out << "[" << done << "/" << state.total << "]"; + char buf[64]; + std::snprintf(buf, sizeof(buf), "[%zu/%zu]", done, state.total); + out += buf; - return out.str(); + return out; } auto format_duration(std::chrono::milliseconds ms) -> std::string diff --git a/src/exec/scheduler.cpp b/src/exec/scheduler.cpp index 37ad295..13fad97 100644 --- a/src/exec/scheduler.cpp +++ b/src/exec/scheduler.cpp @@ -3,10 +3,13 @@ #include "pup/exec/scheduler.hpp" #include "pup/core/metrics.hpp" +#include "pup/core/node_id_map.hpp" +#include "pup/core/path.hpp" #include "pup/graph/dag.hpp" #include "pup/graph/rule_pattern.hpp" #include "pup/graph/topo.hpp" #include "pup/parser/depfile.hpp" +#include "pup/platform/file_io.hpp" #include #include @@ -44,25 +47,25 @@ auto build_env_cache(std::vector const& jobs) -> EnvCache /// If the path is already prefixed with the variant output directory, use source_root as base. /// Otherwise, use output_root as base. auto resolve_variant_path( - std::filesystem::path const& source_root, - std::filesystem::path const& output_root, + std::string const& source_root, + std::string const& output_root, std::string_view output_root_prefix, - std::filesystem::path const& path -) -> std::filesystem::path + std::string const& path +) -> std::string { - auto path_str = path.generic_string(); + auto path_str = path; if (!output_root_prefix.empty() && path_str.starts_with(output_root_prefix) && (path_str.size() == output_root_prefix.size() || path_str[output_root_prefix.size()] == '/')) { - return source_root / path; + return pup::path::join(source_root, path); } - return output_root / path; + return pup::path::join(output_root, path); } /// Add job dependencies for any command that produces the given node. /// For phi-nodes (multiple producers), only add active producers when consumer is active. auto add_producer_dependencies( graph::BuildGraph const& graph, - std::unordered_map const& cmd_to_job, + NodeIdMap32 const& cmd_to_job, std::vector const& jobs, NodeId node_id, std::size_t current_job, @@ -72,11 +75,11 @@ auto add_producer_dependencies( auto current_active = jobs[current_job].guard_active; for (auto producer_id : graph.get_inputs(node_id)) { - if (node_id::is_command(producer_id)) { - if (auto it = cmd_to_job.find(producer_id); it != cmd_to_job.end() && it->second != current_job) { - // Only add dependency if: current is inactive OR producer is active - if (!current_active || jobs[it->second].guard_active) { - dependencies.insert(it->second); + if (node_id::is_command(producer_id) && cmd_to_job.contains(producer_id)) { + auto dep_idx = static_cast(cmd_to_job.get(producer_id)); + if (dep_idx != current_job) { + if (!current_active || jobs[dep_idx].guard_active) { + dependencies.insert(dep_idx); } } } @@ -97,9 +100,9 @@ auto build_dependency_map( auto dependents = std::vector>(jobs.size()); // Build map from command NodeId -> job index - auto cmd_to_job = std::unordered_map {}; + auto cmd_to_job = NodeIdMap32 {}; for (auto i = std::size_t { 0 }; i < jobs.size(); ++i) { - cmd_to_job[jobs[i].id] = i; + cmd_to_job.set(jobs[i].id, static_cast(i)); } // For each job, find dependencies via input edges @@ -113,10 +116,12 @@ auto build_dependency_map( for (auto input_id : graph.get_inputs(cmd_id)) { // Case 1: Input itself is a command (e.g., generated dep-scan rule) if (node_id::is_command(input_id)) { - if (auto it = cmd_to_job.find(input_id); it != cmd_to_job.end() && it->second != j) { - // Apply guard filtering: skip inactive producers when current job is active - if (!current_active || jobs[it->second].guard_active) { - dependencies.insert(it->second); + if (cmd_to_job.contains(input_id)) { + auto dep_idx = static_cast(cmd_to_job.get(input_id)); + if (dep_idx != j) { + if (!current_active || jobs[dep_idx].guard_active) { + dependencies.insert(dep_idx); + } } } continue; @@ -184,10 +189,10 @@ auto validate_guard_dependencies( auto collect_required_commands( graph::BuildGraph const& graph, std::vector const& target_ids -) -> std::set +) -> NodeIdMap32 { - auto visited = std::set {}; - auto commands = std::set {}; + auto visited = NodeIdMap32 {}; + auto commands = NodeIdMap32 {}; auto stack = std::vector(target_ids.begin(), target_ids.end()); while (!stack.empty()) { @@ -197,10 +202,10 @@ auto collect_required_commands( if (visited.contains(id)) { continue; } - visited.insert(id); + visited.set(id, 1); if (node_id::is_command(id) && graph.get_command_node(id)) { - commands.insert(id); + commands.set(id, 1); } for (auto input_id : graph.get_inputs(id)) { @@ -225,6 +230,18 @@ struct Scheduler::Impl { JobStartCallback on_start; JobCompleteCallback on_complete; ProgressCallback on_progress; + + auto execute_sequential( + std::vector const& jobs, + graph::BuildGraph const& graph, + EnvCache const& env_cache + ) -> Result; + + auto execute_job( + BuildJob const& job, + CommandRunner& runner, + EnvCache const& env_cache + ) -> JobResult; }; Scheduler::Scheduler(SchedulerOptions options) @@ -326,19 +343,26 @@ auto Scheduler::build_incremental( } // Find all nodes affected by changes - auto affected = std::set {}; + auto affected = NodeIdMap32 {}; + auto affected_vec = std::vector {}; for (auto const& file_path : changed_files) { auto it = path_to_id.find(file_path); if (it != path_to_id.end()) { auto id = it->second; - affected.insert(id); + if (!affected.contains(id)) { + affected.set(id, 1); + affected_vec.push_back(id); + } // For generated files that are missing/changed, also mark the producing command auto const* node = graph.get_file_node(id); if (node && node->type == NodeType::Generated) { for (auto input_id : graph.get_inputs(id)) { - affected.insert(input_id); + if (!affected.contains(input_id)) { + affected.set(input_id, 1); + affected_vec.push_back(input_id); + } } } } @@ -347,20 +371,22 @@ auto Scheduler::build_incremental( // Expand to include all dependent commands (including order-only) // get_outputs() excludes sticky edges by design (Tupfile/config dependencies // are parse-time deps, not build-time deps) - auto to_process = std::vector(affected.begin(), affected.end()); + auto to_process = std::vector(affected_vec.begin(), affected_vec.end()); while (!to_process.empty()) { auto id = NodeId { to_process.back() }; to_process.pop_back(); for (auto dep_id : graph.get_outputs(id)) { - if (affected.insert(dep_id).second) { + if (!affected.contains(dep_id)) { + affected.set(dep_id, 1); to_process.push_back(dep_id); } } for (auto dep_id : graph.get_order_only_dependents(id)) { - if (affected.insert(dep_id).second) { + if (!affected.contains(dep_id)) { + affected.set(dep_id, 1); to_process.push_back(dep_id); } } @@ -388,18 +414,18 @@ auto Scheduler::build_incremental( return impl_->stats; } -auto Scheduler::execute_sequential( +auto Scheduler::Impl::execute_sequential( std::vector const& jobs, graph::BuildGraph const& graph, - std::unordered_map const& env_cache + EnvCache const& env_cache ) -> Result { auto runner = CommandRunner {}; - if (!impl_->options.source_root.empty()) { - runner.set_working_dir(impl_->options.source_root); + if (!options.source_root.empty()) { + runner.set_working_dir(options.source_root); } - if (impl_->options.timeout) { - runner.set_timeout(*impl_->options.timeout); // NOLINT(bugprone-unchecked-optional-access) + if (options.timeout) { + runner.set_timeout(*options.timeout); // NOLINT(bugprone-unchecked-optional-access) } auto [in_degree, dependents] = build_dependency_map(jobs, graph); @@ -411,7 +437,7 @@ auto Scheduler::execute_sequential( // Count inactive jobs upfront (they never enter the queue) auto inactive_count = std::count_if(jobs.begin(), jobs.end(), [](auto const& j) { return !j.guard_active; }); - impl_->stats.skipped_jobs += static_cast(inactive_count); + stats.skipped_jobs += static_cast(inactive_count); // Only queue active jobs with no dependencies auto ready_queue = std::queue {}; @@ -422,7 +448,7 @@ auto Scheduler::execute_sequential( } while (!ready_queue.empty()) { - if (impl_->cancelled.load()) { + if (cancelled.load()) { break; } @@ -430,34 +456,34 @@ auto Scheduler::execute_sequential( ready_queue.pop(); auto const& job = jobs[job_idx]; - if (impl_->on_start) { - impl_->on_start(job); + if (on_start) { + on_start(job); } auto result = JobResult { execute_job(job, runner, env_cache) }; - if (impl_->on_complete) { - impl_->on_complete(job, result); + if (on_complete) { + on_complete(job, result); } - impl_->stats.build_time += result.duration; + stats.build_time += result.duration; if (result.success) { - ++impl_->stats.completed_jobs; + ++stats.completed_jobs; for (auto dep_idx : dependents[job_idx]) { if (--in_degree[dep_idx] == 0 && jobs[dep_idx].guard_active) { ready_queue.push(dep_idx); } } } else { - ++impl_->stats.failed_jobs; - if (!impl_->options.keep_going) { + ++stats.failed_jobs; + if (!options.keep_going) { return make_error(ErrorCode::CommandFailed, "Command failed"); } } - if (impl_->on_progress) { - impl_->on_progress(impl_->stats.completed_jobs + impl_->stats.failed_jobs, impl_->stats.total_jobs); + if (on_progress) { + on_progress(stats.completed_jobs + stats.failed_jobs, stats.total_jobs); } } @@ -472,7 +498,7 @@ auto Scheduler::execute_parallel( auto const env_cache = build_env_cache(jobs); if (impl_->options.jobs == 1 || jobs.size() == 1) { - return execute_sequential(jobs, graph, env_cache); + return impl_->execute_sequential(jobs, graph, env_cache); } // Parallel execution with dependency-aware ready queue @@ -547,7 +573,7 @@ auto Scheduler::execute_parallel( impl_->on_start(job); } - auto result = JobResult { execute_job(job, runner, env_cache) }; + auto result = JobResult { impl_->execute_job(job, runner, env_cache) }; { auto lock = std::lock_guard { mutex }; @@ -626,10 +652,10 @@ auto Scheduler::execute_parallel( return {}; } -auto Scheduler::execute_job( +auto Scheduler::Impl::execute_job( BuildJob const& job, CommandRunner& runner, - std::unordered_map const& env_cache + EnvCache const& env_cache ) -> JobResult { auto result = JobResult { @@ -640,7 +666,7 @@ auto Scheduler::execute_job( .duration = {}, }; - if (impl_->options.dry_run) { + if (options.dry_run) { result.success = true; return result; } @@ -651,21 +677,20 @@ auto Scheduler::execute_job( // - Absolute: use as-is // - Already variant-mapped (starts with relative output_root prefix): use source_root as base // - Source-relative: prepend output_root - auto source_root = impl_->options.source_root; - auto relative_output_root = std::filesystem::relative( - impl_->options.output_root, + auto source_root = options.source_root; + auto relative_output_root = pup::path::relative( + options.output_root, source_root ); - auto output_root_prefix = relative_output_root.generic_string(); + auto output_root_prefix = relative_output_root; for (auto const& output : job.outputs) { - auto output_path = std::filesystem::path { output }; - if (!output_path.is_absolute()) { - output_path = resolve_variant_path(source_root, impl_->options.output_root, output_root_prefix, output); + auto output_path = std::string { output }; + if (!pup::path::is_absolute(output_path)) { + output_path = resolve_variant_path(source_root, options.output_root, output_root_prefix, output); } - auto parent = output_path.parent_path(); + auto parent = std::string { pup::path::parent(output_path) }; if (!parent.empty()) { - auto ec = std::error_code {}; - std::filesystem::create_directories(parent, ec); + (void)pup::platform::create_directories(parent); } } @@ -717,8 +742,8 @@ auto Scheduler::execute_job( // Traditional .d file discovery for (auto const& output : job.outputs) { - auto output_path = std::filesystem::path { output }; - auto ext = output_path.extension().string(); + auto output_path = std::string { output }; + auto ext = pup::path::extension(output_path); // Support common object file extensions (.o on Unix, .obj on Windows) if (ext != ".o" && ext != ".obj") { @@ -726,10 +751,10 @@ auto Scheduler::execute_job( } // Compute filesystem path for the .d file - auto base_path = resolve_variant_path(source_root, impl_->options.output_root, output_root_prefix, output_path.parent_path()); - auto depfile_path = base_path / (output_path.stem().string() + ".d"); + auto base_path = resolve_variant_path(source_root, options.output_root, output_root_prefix, std::string { pup::path::parent(output_path) }); + auto depfile_path = pup::path::join(base_path, std::string { pup::path::stem(output_path) } + ".d"); - if (!std::filesystem::exists(depfile_path)) { + if (!pup::platform::exists(depfile_path)) { continue; } @@ -768,15 +793,15 @@ auto Scheduler::build_job_list( auto path = graph.get_full_path(id); // Check if file exists - if so, it's a valid input (not missing) auto build_root_name = std::string { graph.get_build_root_name() }; - auto file_path = impl_->options.output_root / path; + auto file_path = pup::path::join(impl_->options.output_root, path); // Strip build prefix from path if present (for consistent file lookup) auto lookup_path = path; auto build_prefix = build_root_name + "/"; if (!build_root_name.empty() && path.starts_with(build_prefix)) { lookup_path = path.substr(build_prefix.size()); - file_path = impl_->options.output_root / lookup_path; + file_path = pup::path::join(impl_->options.output_root, lookup_path); } - if (std::filesystem::exists(file_path)) { + if (pup::platform::exists(file_path)) { continue; // File exists, not a missing input } return make_error>( @@ -804,9 +829,9 @@ auto Scheduler::build_job_list( // and TUP_VARIANT_OUTPUTDIR work correctly. Output paths are already // mapped to the output directory by the builder. auto source_dir = get_source_dir(graph.graph(), id); - auto working_dir = std::filesystem::path { impl_->options.source_root }; + auto working_dir = std::string { impl_->options.source_root }; if (!source_dir.empty()) { - working_dir /= source_dir; + working_dir = pup::path::join(working_dir, std::string { source_dir }); } // Check if this is a generated rule that captures stdout @@ -825,10 +850,10 @@ auto Scheduler::build_job_list( auto cmd_str = expand_instruction(graph.graph(), id, cache, impl_->options.source_root, impl_->options.config_root); auto display_str = std::string { get_display_str(graph.graph(), id) }; - // Convert exported_vars from StringIds to strings - auto exported_str = std::set {}; - for (auto var_id : node->exported_vars) { - exported_str.insert(std::string { graph.graph().strings.get(var_id) }); + auto exported_str = std::vector {}; + exported_str.reserve(node->exported_vars.size()); + for (auto raw_id : node->exported_vars) { + exported_str.emplace_back(graph.graph().strings.get(make_string_id(raw_id))); } // Evaluate guards - command only executes if ALL guards are satisfied @@ -900,7 +925,7 @@ auto Scheduler::build_job_list( auto Scheduler::build_subset( graph::BuildGraph const& graph, - std::set const& command_ids + NodeIdMap32 const& command_ids ) -> Result { auto start_time = std::chrono::steady_clock::time_point { std::chrono::steady_clock::now() }; @@ -949,7 +974,7 @@ auto Scheduler::build_targets( impl_->stats = BuildStats {}; // Collect all commands needed to build these targets via reverse traversal - auto required_cmds = std::set { collect_required_commands(graph, target_ids) }; + auto required_cmds = collect_required_commands(graph, target_ids); // Build all jobs, then filter to required commands auto all_jobs = Result> { build_job_list(graph) }; @@ -985,7 +1010,7 @@ auto Scheduler::build_targets( auto Scheduler::filter_jobs( std::vector const& all_jobs, - std::set const& affected_nodes + NodeIdMap32 const& affected_nodes ) -> std::vector { auto result = std::vector {}; diff --git a/src/graph/builder.cpp b/src/graph/builder.cpp index 6dfd2ab..a796680 100644 --- a/src/graph/builder.cpp +++ b/src/graph/builder.cpp @@ -3,6 +3,7 @@ #include "pup/graph/builder.hpp" #include "pup/core/hash.hpp" +#include "pup/core/node_id_map.hpp" #include "pup/core/path_utils.hpp" #include "pup/graph/dep_scanner.hpp" #include "pup/graph/rule_pattern.hpp" @@ -10,23 +11,27 @@ #include "pup/parser/glob.hpp" #include "pup/parser/parser.hpp" +#include "pup/core/path.hpp" +#include "pup/platform/file_io.hpp" + #include #include #include -#include #include -#include -#include -#include -#include namespace pup::graph { -namespace fs = std::filesystem; - namespace { +auto sorted_insert(std::vector& v, std::string const& key) -> void +{ + auto pos = std::lower_bound(v.begin(), v.end(), key); + if (pos == v.end() || *pos != key) { + v.insert(pos, key); + } +} + /// Strip trailing slashes from a path string auto strip_trailing_slashes(std::string str) -> std::string { @@ -44,8 +49,7 @@ auto normalize_path(std::string const& path_str) -> std::string if (path_str.empty()) { return path_str; } - auto path = fs::path { path_str }.lexically_normal(); - return path.generic_string(); + return pup::path::normalize(path_str); } /// Normalize a directory path for group key lookup. @@ -54,28 +58,28 @@ auto normalize_path(std::string const& path_str) -> std::string /// - Resolves parent references (..) against current_dir auto normalize_group_dir( std::string const& path_str, - fs::path const& current_dir, - fs::path const& source_root + std::string const& current_dir, + std::string const& source_root ) -> std::string { auto cleaned = strip_trailing_slashes(path_str); - auto raw_path = fs::path { cleaned }; - auto path = raw_path.lexically_normal(); + if (cleaned.empty()) { + return "."; + } + + auto normalized = pup::path::normalize(cleaned); - if (path.is_absolute()) { - path = fs::relative(path, source_root); - } else if (!current_dir.empty() && !path.empty()) { - // Check the pre-normalization first component to distinguish: - // "./" prefix (e.g. $(S)/foo/) → root-relative, do NOT combine - // "../" prefix → parent traversal, combine with current_dir - // bare name (e.g. mpn/) → subdirectory reference, combine with current_dir - auto first = *raw_path.begin(); - if (first != ".") { - path = (current_dir / path).lexically_normal(); + if (pup::path::is_absolute(normalized)) { + normalized = pup::path::relative(normalized, source_root); + } else if (!current_dir.empty() && normalized != ".") { + auto first_slash = cleaned.find('/'); + auto first_component = (first_slash == std::string::npos) ? cleaned : cleaned.substr(0, first_slash); + if (first_component != ".") { + normalized = pup::path::normalize(pup::path::join(current_dir, normalized)); } } - return path.empty() ? "." : path.generic_string(); + return (normalized.empty() || normalized == ".") ? "." : normalized; } /// Parsed group reference from a path like "../include/" @@ -96,11 +100,11 @@ auto is_order_only_group_reference(std::string_view path) -> bool /// This is used when referencing cross-directory groups or generated files. auto request_demand_driven_parse( parser::EvalContext const& eval, - fs::path const& dir_path + std::string const& dir_path ) -> void { if (eval.request_directory && eval.available_tupfile_dirs) { - if (eval.available_tupfile_dirs->contains(dir_path)) { + if (std::binary_search(eval.available_tupfile_dirs->begin(), eval.available_tupfile_dirs->end(), dir_path)) { (void)eval.request_directory(dir_path); } } @@ -110,8 +114,8 @@ auto request_demand_driven_parse( /// Returns nullopt if the path doesn't contain a valid suffix auto parse_group_reference( std::string const& path, - fs::path const& current_dir, - fs::path const& source_root + std::string const& current_dir, + std::string const& source_root ) -> std::optional { auto lt_pos = path.rfind('<'); @@ -136,8 +140,8 @@ auto parse_group_reference( /// by checking if they point to output_root and returning the relative path within it. auto normalize_to_output_relative( std::string_view path, - fs::path const& source_root, - fs::path const& output_root + std::string const& source_root, + std::string const& output_root ) -> std::string { if (auto resolved = pup::resolve_under_root(path, source_root, output_root)) { @@ -156,23 +160,26 @@ auto normalize_to_output_relative( struct PathTransformContext { std::string source_to_root; std::string current_dir_str; - fs::path source_root; - fs::path config_root; - fs::path output_root; - fs::path canonical_cwd; // Canonical source CWD for symlink-safe path resolution + std::string source_root; + std::string config_root; + std::string output_root; + std::string canonical_cwd; }; auto make_transform_context(BuilderContext const& ctx) -> PathTransformContext { - auto canonical_cwd = fs::path {}; + auto canonical_cwd = std::string {}; if (!ctx.options.source_root.empty() && !ctx.options.output_root.empty() && ctx.options.source_root != ctx.options.output_root) { - canonical_cwd = fs::weakly_canonical(ctx.options.source_root / ctx.current_dir); + auto r = pup::platform::canonical(pup::path::join(ctx.options.source_root, ctx.current_dir)); + if (r) { + canonical_cwd = *r; + } } return PathTransformContext { - .source_to_root = pup::compute_source_to_root(ctx.current_dir.generic_string()), - .current_dir_str = ctx.current_dir.generic_string(), + .source_to_root = pup::compute_source_to_root(ctx.current_dir), + .current_dir_str = ctx.current_dir, .source_root = ctx.options.source_root, .config_root = ctx.options.config_root, .output_root = ctx.options.output_root, @@ -185,8 +192,12 @@ auto make_transform_context(BuilderContext const& ctx) -> PathTransformContext /// navigate correctly from the physical CWD (which the OS uses after resolving symlinks). auto make_canonical_relative(PathTransformContext const& tc, std::string const& path) -> std::string { - auto abs = fs::weakly_canonical(tc.source_root / path); - return abs.lexically_relative(tc.canonical_cwd).generic_string(); + auto joined = pup::path::join(tc.source_root, path); + auto abs = pup::platform::canonical(joined); + if (abs) { + return pup::path::relative(*abs, tc.canonical_cwd); + } + return pup::path::relative(pup::path::normalize(joined), tc.canonical_cwd); } /// Transform an input path to Tupfile-relative for command expansion. @@ -214,8 +225,8 @@ auto transform_input_path( // Check if file exists in build directory - if so, use variant-prefixed path. auto build_root_name = std::string { graph.get_build_root_name() }; if (!build_root_name.empty()) { - auto build_path = tc.output_root / inp; - if (fs::exists(build_path)) { + auto build_path = pup::path::join(tc.output_root, inp); + if (pup::platform::exists(build_path)) { auto full_path = build_root_name + "/" + inp; if (!tc.canonical_cwd.empty() && full_path.starts_with("..")) { return make_canonical_relative(tc, full_path); @@ -227,12 +238,14 @@ auto transform_input_path( // In 3-tree builds, files may live in config_root rather than source_root. // Compute a path from the source cwd (where the command runs) to the config_root location. if (!tc.config_root.empty() && tc.config_root != tc.source_root) { - auto config_path = tc.config_root / inp; - if (fs::exists(config_path)) { - auto source_cwd = tc.source_root / tc.current_dir_str; - auto canonical_source = fs::weakly_canonical(source_cwd); - auto canonical_config = fs::weakly_canonical(config_path); - return canonical_config.lexically_relative(canonical_source).generic_string(); + auto config_path = pup::path::join(tc.config_root, inp); + if (pup::platform::exists(config_path)) { + auto source_cwd = pup::path::join(tc.source_root, tc.current_dir_str); + auto canonical_source = pup::platform::canonical(source_cwd); + auto canonical_config = pup::platform::canonical(config_path); + if (canonical_source && canonical_config) { + return pup::path::relative(*canonical_config, *canonical_source); + } } } @@ -292,10 +305,12 @@ auto walk_path_to_directory( } auto current_id = start_dir_id; - auto p = fs::path { path }; + auto remaining = path; - for (auto const& component : p) { - auto comp_str = component.string(); + while (!remaining.empty()) { + auto slash = remaining.find('/'); + auto comp_str = std::string { slash == std::string_view::npos ? remaining : remaining.substr(0, slash) }; + remaining = (slash == std::string_view::npos) ? std::string_view {} : remaining.substr(slash + 1); if (comp_str.empty() || comp_str == ".") { continue; } @@ -349,15 +364,12 @@ auto walk_to_file_node( return make_error(ErrorCode::InvalidArgument, "Empty path"); } - // Walk the path components - auto p = fs::path { path }; - auto parent_path = p.parent_path(); - auto basename = p.filename().string(); + auto par = pup::path::parent(path); + auto basename = std::string { pup::path::filename(path) }; - // Walk to parent directory auto target_dir_id = start_id; - if (!parent_path.empty() && parent_path != ".") { - target_dir_id = walk_path_to_directory(graph, start_id, parent_path.generic_string()); + if (!par.empty() && par != ".") { + target_dir_id = walk_path_to_directory(graph, start_id, par); } // Find or create the file node @@ -523,7 +535,7 @@ auto expand_command( auto get_or_create_directory_node( BuilderContext& ctx, - std::filesystem::path const& dir_path, + std::string const& dir_path, int depth = 0 ) -> Result; @@ -556,28 +568,31 @@ auto create_command_node( /// Per tup semantics, include_rules includes every Tuprules.tup from the /// project root down to the current directory. Gaps are allowed. auto find_tuprules_files( - fs::path const& start_dir, - fs::path const& root -) -> std::vector + std::string const& start_dir, + std::string const& root +) -> std::vector { - auto dirs = std::vector {}; + auto dirs = std::vector {}; auto search_dir = start_dir; - while (search_dir >= root) { + while (search_dir.size() >= root.size()) { dirs.push_back(search_dir); if (search_dir == root) { break; } - search_dir = search_dir.parent_path(); + auto par = std::string { pup::path::parent(search_dir) }; + if (par == search_dir || par.empty()) { + break; + } + search_dir = par; } - // Reverse to root-first order std::reverse(dirs.begin(), dirs.end()); - auto results = std::vector {}; + auto results = std::vector {}; for (auto const& dir : dirs) { - auto tuprules = dir / "Tuprules.tup"; - if (fs::exists(tuprules)) { + auto tuprules = pup::path::join(dir, "Tuprules.tup"); + if (pup::platform::exists(tuprules)) { results.push_back(tuprules); } } @@ -589,18 +604,18 @@ auto find_tuprules_files( /// Returns the resolved path or an error auto resolve_include_path( BuilderContext& ctx, - fs::path const& include_root, + std::string const& include_root, parser::Expression const& path_expr -) -> Result +) -> Result { auto path_result = parser::expand(*ctx.eval, path_expr); if (!path_result) { return pup::unexpected(path_result.error()); } - auto resolved = fs::path { include_root / ctx.current_dir / *path_result }; - if (!fs::exists(resolved)) { - return make_error(ErrorCode::IncludeNotFound, "Include file not found: " + *path_result); + auto resolved = pup::path::join(pup::path::join(include_root, ctx.current_dir), *path_result); + if (!pup::platform::exists(resolved)) { + return make_error(ErrorCode::IncludeNotFound, "Include file not found: " + *path_result); } return resolved; } @@ -613,8 +628,8 @@ auto expand_glob_pattern( std::vector& result ) -> void { - auto base = fs::path { ctx.current_dir.empty() ? ctx.options.source_root - : ctx.options.source_root / ctx.current_dir }; + auto base = ctx.current_dir.empty() ? ctx.options.source_root + : pup::path::join(ctx.options.source_root, ctx.current_dir); // First try expanding against filesystem auto expanded = parser::glob_expand(path, base); @@ -622,7 +637,7 @@ auto expand_glob_pattern( for (auto& p : *expanded) { // Prefix with current_dir to make path relative to project root if (!ctx.current_dir.empty()) { - result.push_back((ctx.current_dir / p).generic_string()); + result.push_back(pup::path::join(ctx.current_dir, p)); } else { result.push_back(std::move(p)); } @@ -632,15 +647,15 @@ auto expand_glob_pattern( // No files on disk - look for matching Generated nodes in graph // First, try demand-driven parsing of the directory containing the glob pattern - auto pattern_dir = fs::path { path }.parent_path(); - auto abs_pattern_dir = (ctx.current_dir / pattern_dir).lexically_normal(); + auto pattern_dir = std::string { pup::path::parent(path) }; + auto abs_pattern_dir = pup::path::normalize(pup::path::join(ctx.current_dir, pattern_dir)); request_demand_driven_parse(*ctx.eval, abs_pattern_dir); // Match glob pattern against Generated nodes // In 3-tree builds, Generated nodes are stored with build root prefix (e.g., ../build/hello.o) // but the glob pattern is relative to current directory (e.g., *.o) // We need to strip the build root prefix and match against the relative path - auto pattern_path = ctx.current_dir.empty() ? path : (ctx.current_dir / path).lexically_normal().generic_string(); + auto pattern_path = ctx.current_dir.empty() ? path : pup::path::normalize(pup::path::join(ctx.current_dir, path)); auto glob = parser::Glob { pattern_path }; auto build_root_name = ctx.graph->get_build_root_name(); for (auto id : ctx.graph->nodes_of_type(NodeType::Generated)) { @@ -676,21 +691,21 @@ auto apply_exclusions( for (auto const& excl : *paths) { if (ctx.options.expand_globs && parser::has_glob_chars(excl)) { - auto base = fs::path { ctx.current_dir.empty() ? ctx.options.source_root - : ctx.options.source_root / ctx.current_dir }; + auto base = ctx.current_dir.empty() ? ctx.options.source_root + : pup::path::join(ctx.options.source_root, ctx.current_dir); auto expanded = parser::glob_expand(excl, base); if (expanded && !expanded->empty()) { for (auto const& p : *expanded) { auto normalized = ctx.current_dir.empty() - ? fs::path { p }.lexically_normal().generic_string() - : (ctx.current_dir / p).lexically_normal().generic_string(); + ? pup::path::normalize(p) + : pup::path::normalize(pup::path::join(ctx.current_dir, p)); std::erase(result, normalized); } } } else { auto normalized_excl = ctx.current_dir.empty() - ? fs::path { excl }.lexically_normal().generic_string() - : (ctx.current_dir / excl).lexically_normal().generic_string(); + ? pup::path::normalize(excl) + : pup::path::normalize(pup::path::join(ctx.current_dir, excl)); std::erase(result, normalized_excl); } } @@ -730,7 +745,11 @@ auto process_generated_rules( // This is a group reference - get/create group node and defer edge auto group_id_result = get_or_create_group_node(ctx, state, group_ref->group_dir, group_ref->group_name); if (group_id_result) { - state.deferred_edges.insert({ *group_id_result, *gen_cmd_id }); + auto edge = DeferredOrderOnlyEdge { *group_id_result, *gen_cmd_id }; + auto pos = std::lower_bound(state.deferred_edges.begin(), state.deferred_edges.end(), edge); + if (pos == state.deferred_edges.end() || !(*pos == edge)) { + state.deferred_edges.insert(pos, edge); + } } } else if (!parser::has_glob_chars(oi)) { // Regular file path - create edge directly (skip glob patterns) @@ -785,8 +804,9 @@ auto lookup_bang_macro( ? cmd_str.substr(1) : cmd_str.substr(1, name_end - 1); - auto it = ctx.macros.find(macro_name); - if (it == ctx.macros.end()) { + auto key = to_underlying(ctx.graph->intern(macro_name)); + auto it = std::lower_bound(ctx.macros.begin(), ctx.macros.end(), key, [](auto const& p, auto k) { return p.first < k; }); + if (it == ctx.macros.end() || it->first != key) { return make_error(ErrorCode::UnknownMacro, "Unknown bang macro: !" + macro_name); } @@ -846,12 +866,12 @@ auto apply_pending_weak_assignments(BuilderContext& ctx, BuilderState& state) -> ++it) { if (!ctx.vars->contains(it->name)) { ctx.vars->set(it->name, it->value); - // Record transitive dependencies for this effective assignment + auto name_id = ctx.graph->intern(it->name); if (!it->config_deps.empty()) { - state.var_config_deps[it->name] = std::move(it->config_deps); + state.var_config_deps.get_or_create(name_id) = std::move(it->config_deps); } if (!it->env_deps.empty()) { - state.var_env_deps[it->name] = std::move(it->env_deps); + state.var_env_deps.get_or_create(name_id) = std::move(it->env_deps); } } } @@ -920,8 +940,7 @@ auto process_bang_macro( parser::BangMacro const& macro ) -> Result { - // Store macro definition for later use - ctx.macros[macro.name] = BangMacroDef { + auto def = BangMacroDef { .name = macro.name, .foreach_ = macro.foreach_, .order_only_inputs = macro.order_only_inputs, @@ -933,6 +952,13 @@ auto process_bang_macro( .output_order_only_group = macro.output_order_only_group, .output_order_only_group_dir = macro.output_order_only_group_dir, }; + auto key = to_underlying(ctx.graph->intern(macro.name)); + auto it = std::lower_bound(ctx.macros.begin(), ctx.macros.end(), key, [](auto const& p, auto k) { return p.first < k; }); + if (it != ctx.macros.end() && it->first == key) { + it->second = std::move(def); + } else { + ctx.macros.insert(it, { key, std::move(def) }); + } return {}; } @@ -951,10 +977,9 @@ auto process_assignment( // Save current tracking state and clear for value expansion // This lets us capture which config/env vars are used in the RHS + // SortedIdVec moved-from state is empty, so no explicit clear() needed auto saved_config_vars = std::move(ctx.used_config_vars); auto saved_env_vars = std::move(ctx.used_env_vars); - ctx.used_config_vars.clear(); - ctx.used_env_vars.clear(); // Evaluate the value - callbacks will populate used_*_vars auto value = parser::expand(*ctx.eval, assign.value); @@ -986,25 +1011,26 @@ auto process_assignment( auto is_effective = true; // Helper to record transitive dependencies for this variable + auto name_id = ctx.graph->intern(*name); auto record_deps = [&]() { if (!captured_config_deps.empty()) { - auto& deps = state.var_config_deps[*name]; + auto& deps = state.var_config_deps.get_or_create(name_id); if (assign.op == parser::Assignment::Op::Set || assign.op == parser::Assignment::Op::Define || assign.op == parser::Assignment::Op::SoftSet) { deps = std::move(captured_config_deps); } else if (assign.op == parser::Assignment::Op::Append) { - deps.merge(captured_config_deps); + deps.merge_from(captured_config_deps); } } if (!captured_env_deps.empty()) { - auto& deps = state.var_env_deps[*name]; + auto& deps = state.var_env_deps.get_or_create(name_id); if (assign.op == parser::Assignment::Op::Set || assign.op == parser::Assignment::Op::Define || assign.op == parser::Assignment::Op::SoftSet) { deps = std::move(captured_env_deps); } else if (assign.op == parser::Assignment::Op::Append) { - deps.merge(captured_env_deps); + deps.merge_from(captured_env_deps); } } }; @@ -1051,7 +1077,7 @@ auto process_assignment( assign.op, value_before, value_after, - ctx.current_file.string(), + ctx.current_file.c_str(), assign.location.line, assign.location.column, is_effective @@ -1089,22 +1115,24 @@ auto process_conditional( parser::Conditional const& cond ) -> Result { - // Clear used_config_vars before evaluating condition to capture which vars are used - auto saved_config_vars = ctx.used_config_vars; - ctx.used_config_vars.clear(); + // Save and clear used_config_vars to capture which vars the condition uses + auto saved_config_vars = std::move(ctx.used_config_vars); // Evaluate condition value - this may use config vars like @(MODE) auto condition_true = parser::evaluate_condition(*ctx.eval, cond); // Capture config vars used in the condition expression - // These need to be dependencies for commands in both branches auto condition_vars = std::move(ctx.used_config_vars); ctx.used_config_vars = std::move(saved_config_vars); - // Add condition's config vars to the condition_config_vars set - // (accumulated for nested conditionals) - auto saved_condition_vars = ctx.condition_config_vars; - ctx.condition_config_vars.insert(condition_vars.begin(), condition_vars.end()); + // Save condition_config_vars, then merge in condition-specific vars + auto saved_condition_vars = std::move(ctx.condition_config_vars); + // Rebuild: copy saved entries + merge condition_vars + auto const* d = saved_condition_vars.data(); + for (std::size_t i = 0, n = saved_condition_vars.size(); i < n; ++i) { + ctx.condition_config_vars.insert(d[i]); + } + ctx.condition_config_vars.merge_from(condition_vars); auto restore_condition_vars = ScopeGuard([&] { ctx.condition_config_vars = std::move(saved_condition_vars); }); @@ -1174,30 +1202,28 @@ auto process_conditional( auto include_single_file( BuilderContext& ctx, BuilderState& state, - fs::path const& include_root, + std::string const& include_root, std::string const& include_path, bool is_rules ) -> Result { - if (ctx.included_files.contains(include_path)) { + auto include_path_id = to_underlying(ctx.graph->intern(include_path)); + if (ctx.included_files.contains(include_path_id)) { return {}; } - ctx.included_files.insert(include_path); + ctx.included_files.insert(include_path_id); - auto inc_rel = fs::relative(include_path, include_root).generic_string(); + auto inc_rel = pup::path::relative(include_path, include_root); auto inc_node_result = get_or_create_file_node(ctx, inc_rel, NodeType::File); if (inc_node_result) { ctx.sticky_sources.push_back(*inc_node_result); } - auto file = std::ifstream { include_path }; - if (!file) { + auto source_result = pup::platform::read_file(include_path); + if (!source_result) { return make_error(ErrorCode::IoError, "Cannot open include file: " + include_path); } - - auto ss = std::stringstream {}; - ss << file.rdbuf(); - auto source = std::string { ss.str() }; + auto source = std::move(*source_result); auto parse_result = parser::parse_tupfile(source, include_path); if (!parse_result.success()) { @@ -1210,9 +1236,9 @@ auto include_single_file( auto old_tup_cwd = std::string {}; if (is_rules && ctx.eval) { old_tup_cwd = ctx.eval->tup_cwd; - auto include_dir = fs::path { include_path }.parent_path(); - auto rel_path = fs::relative(include_dir, include_root / ctx.current_dir); - ctx.eval->tup_cwd = rel_path.empty() ? "." : rel_path.generic_string(); + auto include_dir = std::string { pup::path::parent(include_path) }; + auto rel_path = pup::path::relative(include_dir, pup::path::join(include_root, ctx.current_dir)); + ctx.eval->tup_cwd = rel_path.empty() ? "." : rel_path; } auto old_current_file = ctx.current_file; @@ -1246,9 +1272,9 @@ auto process_include( auto const& include_root = ctx.options.config_root.empty() ? ctx.options.source_root : ctx.options.config_root; if (inc.is_rules) { - auto tuprules_files = find_tuprules_files(include_root / ctx.current_dir, include_root); + auto tuprules_files = find_tuprules_files(pup::path::join(include_root, ctx.current_dir), include_root); for (auto const& tuprules : tuprules_files) { - auto result = include_single_file(ctx, state, include_root, tuprules.generic_string(), true); + auto result = include_single_file(ctx, state, include_root, tuprules, true); if (!result) { return pup::unexpected(result.error()); } @@ -1260,7 +1286,7 @@ auto process_include( if (!resolved) { return pup::unexpected(resolved.error()); } - return include_single_file(ctx, state, include_root, resolved->generic_string(), false); + return include_single_file(ctx, state, include_root, *resolved, false); } auto process_import( @@ -1278,8 +1304,13 @@ auto process_import( value = env_val; } // 2. Fall back to cached value from previous build (passed via options) - else if (auto it = state.options.cached_env_vars.find(imp.var_name); - it != state.options.cached_env_vars.end()) { + else if (auto it = std::lower_bound( + state.options.cached_env_vars.begin(), + state.options.cached_env_vars.end(), + imp.var_name, + [](auto const& p, auto const& k) { return p.first < k; } + ); + it != state.options.cached_env_vars.end() && it->first == imp.var_name) { value = it->second; } // 3. Fall back to default value @@ -1293,22 +1324,20 @@ auto process_import( // If no env, no cache, and no default, variable remains empty (tup behavior) // Create/update Variable node under $ directory for persistence + auto var_name_id = to_underlying(ctx.graph->intern(imp.var_name)); if (state.env_var_dir_id != INVALID_NODE_ID) { auto node_name = imp.var_name + "=" + value; auto content_hash = sha256(value); - // Check if we already have a node for this variable (from same build session) - auto it = state.imported_env_var_nodes.find(imp.var_name); + auto const* existing_node_id = state.imported_env_var_nodes.find(var_name_id); auto const name_id = ctx.graph->intern(node_name); - if (it != state.imported_env_var_nodes.end()) { - // Update existing node in-place if value changed - auto* existing = ctx.graph->get_file_node(it->second); + if (existing_node_id) { + auto* existing = ctx.graph->get_file_node(*existing_node_id); if (existing && existing->name != name_id) { existing->name = name_id; existing->content_hash = content_hash; } } else { - // Create new Variable node auto node = FileNode { .type = NodeType::Variable, .name = name_id, @@ -1317,7 +1346,7 @@ auto process_import( }; auto result = ctx.graph->add_file_node(std::move(node)); if (result) { - state.imported_env_var_nodes[imp.var_name] = *result; + state.imported_env_var_nodes.insert(var_name_id, *result); } } } @@ -1327,7 +1356,7 @@ auto process_import( } // Track this as an imported variable for fine-grained dependency tracking - state.imported_var_names.insert(imp.var_name); + state.imported_var_names.insert(var_name_id); return {}; } @@ -1339,7 +1368,7 @@ auto process_export( { // Per tup manual: "adds the environment variable VARIABLE to the export // list for future :-rules" - ctx.exported_vars.insert(exp.var_name); + ctx.exported_vars.insert(to_underlying(ctx.graph->intern(exp.var_name))); return {}; } @@ -1378,7 +1407,7 @@ auto expand_rule( auto primary_input = cmd_inputs.empty() ? std::string {} : cmd_inputs[0]; auto current_dir_name = ctx.current_dir.empty() ? std::string { "." } - : ctx.current_dir.filename().string(); + : std::string { pup::path::filename(ctx.current_dir) }; auto glob_match = glob_pattern.empty() ? std::string {} : parser::glob_match_extract(glob_pattern, primary_input); @@ -1401,11 +1430,13 @@ auto expand_rule( // Pre-resolve order-only group references so % can expand them in commands // This handles cross-directory groups like: | ../include/ |> cat % - auto rule_order_only_groups = std::unordered_map> {}; + // Stores known group names (sorted); the resolver constructs % on the fly. + auto rule_order_only_group_names = std::vector {}; // Track group NodeIds for deferred edge creation // Groups are first-class nodes; edges created after all Tupfiles are parsed - auto deferred_group_ids = std::set {}; + auto deferred_group_ids = NodeIdMap32 {}; + auto deferred_group_vec = std::vector {}; // Also check regular inputs for order-only group references // In tup, references are always order-only even when in the inputs section @@ -1427,11 +1458,11 @@ auto expand_rule( group_dir = normalize_group_dir(*expanded, ctx.current_dir, ctx.options.source_root); } } else { - group_dir = ctx.current_dir.empty() ? "." : ctx.current_dir.generic_string(); + group_dir = ctx.current_dir.empty() ? "." : ctx.current_dir; } // Demand-driven parsing: request the directory's Tupfile if not yet parsed - request_demand_driven_parse(*ctx.eval, fs::path { group_dir }); + request_demand_driven_parse(*ctx.eval, group_dir); // Get or create the Group node (groups are first-class nodes) auto group_id_result = get_or_create_group_node(ctx, state, group_dir, pattern.group_name); @@ -1441,8 +1472,11 @@ auto expand_rule( auto group_id = *group_id_result; // Preserve % literally — resolved after all Tupfiles are parsed - rule_order_only_groups[pattern.group_name] = { std::format("%<{}>", pattern.group_name) }; - deferred_group_ids.insert(group_id); + sorted_insert(rule_order_only_group_names, pattern.group_name); + if (!deferred_group_ids.contains(group_id)) { + deferred_group_ids.set(group_id, 1); + deferred_group_vec.push_back(group_id); + } } else if (!pattern.path.empty()) { // Path expression that may contain suffix: ../include/ auto expanded = parser::expand(*ctx.eval, pattern.path); @@ -1452,7 +1486,7 @@ auto expand_rule( auto group_ref = parse_group_reference(*expanded, ctx.current_dir, ctx.options.source_root); if (group_ref) { // Demand-driven parsing: request the directory's Tupfile if not yet parsed - request_demand_driven_parse(*ctx.eval, fs::path { group_ref->group_dir }); + request_demand_driven_parse(*ctx.eval, group_ref->group_dir); // Get or create the Group node (groups are first-class nodes) auto group_id_result = get_or_create_group_node(ctx, state, group_ref->group_dir, group_ref->group_name); @@ -1462,8 +1496,11 @@ auto expand_rule( auto group_id = *group_id_result; // Preserve % literally — resolved after all Tupfiles are parsed - rule_order_only_groups[group_ref->group_name] = { std::format("%<{}>", group_ref->group_name) }; - deferred_group_ids.insert(group_id); + sorted_insert(rule_order_only_group_names, group_ref->group_name); + if (!deferred_group_ids.contains(group_id)) { + deferred_group_ids.set(group_id, 1); + deferred_group_vec.push_back(group_id); + } } } } @@ -1473,21 +1510,24 @@ auto expand_rule( // ScopeGuard ensures restoration even on early returns. auto original_resolver = ctx.eval->resolve_order_only_group; auto resolver_guard = ScopeGuard([&] { ctx.eval->resolve_order_only_group = original_resolver; }); - ctx.eval->resolve_order_only_group = [&rule_order_only_groups, &deferred_group_ids, &ctx, &state](std::string_view name + ctx.eval->resolve_order_only_group = [&rule_order_only_group_names, &deferred_group_ids, &deferred_group_vec, &ctx, &state](std::string_view name ) -> std::vector { - auto it = rule_order_only_groups.find(std::string { name }); - if (it != rule_order_only_groups.end()) { - return it->second; + auto name_str = std::string { name }; + if (std::binary_search(rule_order_only_group_names.begin(), rule_order_only_group_names.end(), name_str)) { + return { std::format("%<{}>", name) }; } // Local group not in this rule's inputs — also defer - auto dir = ctx.current_dir.empty() ? "." : ctx.current_dir.generic_string(); - auto key = GroupKey { dir, std::string { name } }; - auto found = state.group_nodes.find(key); - if (found != state.group_nodes.end()) { - deferred_group_ids.insert(found->second); - auto pattern = std::vector { std::format("%<{}>", name) }; - rule_order_only_groups[std::string { name }] = pattern; - return pattern; + auto dir = ctx.current_dir.empty() ? std::string { "." } : ctx.current_dir; + auto key_str = dir + "/" + name_str; + auto key_id = to_underlying(ctx.graph->intern(key_str)); + auto const* node_id = state.group_nodes.find(key_id); + if (node_id) { + if (!deferred_group_ids.contains(*node_id)) { + deferred_group_ids.set(*node_id, 1); + deferred_group_vec.push_back(*node_id); + } + sorted_insert(rule_order_only_group_names, name_str); + return { std::format("%<{}>", name) }; } return {}; }; @@ -1574,7 +1614,7 @@ auto expand_rule( .inputs = file_inputs, .order_only_inputs = order_only_paths, .outputs = *outputs, - .working_dir = ctx.current_dir.generic_string(), + .working_dir = ctx.current_dir, }; // Use scanner_registry (new modular approach) if available, fall back to pattern_registry @@ -1655,7 +1695,8 @@ auto expand_rule( output_group = macro_ptr->output_group; } if (output_group && is_context_active(ctx)) { - ctx.groups[*output_group].push_back(*output_id); + auto gkey = to_underlying(ctx.graph->intern(*output_group)); + ctx.groups.get_or_create(gkey).push_back(*output_id); } // Add to order-only group if specified @@ -1686,7 +1727,7 @@ auto expand_rule( } if (dir.empty()) { - dir = ctx.current_dir.empty() ? "." : ctx.current_dir.generic_string(); + dir = ctx.current_dir.empty() ? "." : ctx.current_dir; } // Create or get the Group node @@ -1718,8 +1759,12 @@ auto expand_rule( // Store deferred edges for groups // These will be resolved after all Tupfiles are parsed (group might grow) - for (auto group_id : deferred_group_ids) { - state.deferred_edges.insert({ group_id, *cmd_id }); + for (auto group_id : deferred_group_vec) { + auto edge = DeferredOrderOnlyEdge { group_id, *cmd_id }; + auto pos = std::lower_bound(state.deferred_edges.begin(), state.deferred_edges.end(), edge); + if (pos == state.deferred_edges.end() || !(*pos == edge)) { + state.deferred_edges.insert(pos, edge); + } } return {}; @@ -1739,9 +1784,9 @@ auto expand_inputs( if (pattern.is_group) { // Bin reference {name} - local to Tupfile - auto it = decltype(ctx.groups)::iterator { ctx.groups.find(pattern.group_name) }; - if (it != ctx.groups.end()) { - for (auto id : it->second) { + auto gkey = to_underlying(ctx.graph->intern(pattern.group_name)); + if (auto const* members = ctx.groups.find(gkey)) { + for (auto id : *members) { auto path = ctx.graph->get_full_path(id); if (!path.empty()) { result.push_back(std::move(path)); @@ -1760,10 +1805,10 @@ auto expand_inputs( auto expanded = parser::expand(*ctx.eval, pattern.path); if (expanded) { group_dir = normalize_group_dir(*expanded, ctx.current_dir, ctx.options.source_root); - request_demand_driven_parse(*ctx.eval, fs::path { group_dir }); + request_demand_driven_parse(*ctx.eval, group_dir); } } else { - group_dir = ctx.current_dir.empty() ? "." : ctx.current_dir.generic_string(); + group_dir = ctx.current_dir.empty() ? "." : ctx.current_dir; } // Return the group reference string so GeneratedRules (DEP commands) can inherit it. @@ -1784,7 +1829,7 @@ auto expand_inputs( // Check for path/ pattern (order-only group reference with directory prefix) auto group_ref = parse_group_reference(path, ctx.current_dir, ctx.options.source_root); if (group_ref) { - request_demand_driven_parse(*ctx.eval, fs::path { group_ref->group_dir }); + request_demand_driven_parse(*ctx.eval, group_ref->group_dir); // Return the group reference string so GeneratedRules (DEP commands) can inherit it. // Edges are created by resolve_deferred_order_only_edges() after all Tupfiles are parsed. result.push_back(path); @@ -1793,7 +1838,7 @@ auto expand_inputs( // Include the path (pattern or literal) // For globs, this preserves the pattern for %g expansion in foreach rules if (!ctx.current_dir.empty()) { - result.push_back((ctx.current_dir / path).lexically_normal().generic_string()); + result.push_back(pup::path::normalize(pup::path::join(ctx.current_dir, path))); } else { result.push_back(path); } @@ -1804,10 +1849,10 @@ auto expand_inputs( } else if (!parser::has_glob_chars(path)) { // Non-glob path: trigger demand-driven parsing if file doesn't exist // (path already added above, but we may need to request cross-directory Tupfile) - auto full_path = ctx.options.source_root / ctx.current_dir / path; - if (!std::filesystem::exists(full_path)) { - auto file_dir = fs::path { path }.parent_path(); - auto abs_file_dir = (ctx.current_dir / file_dir).lexically_normal(); + auto full_path = pup::path::join(pup::path::join(ctx.options.source_root, ctx.current_dir), path); + if (!pup::platform::exists(full_path)) { + auto file_dir = std::string { pup::path::parent(path) }; + auto abs_file_dir = pup::path::normalize(pup::path::join(ctx.current_dir, file_dir)); request_demand_driven_parse(*ctx.eval, abs_file_dir); } } @@ -1848,7 +1893,7 @@ auto expand_outputs( // Combine with current directory and normalize // Output paths are relative to Tupfile directory - auto full_output_path = (ctx.current_dir / output_path).lexically_normal().generic_string(); + auto full_output_path = pup::path::normalize(pup::path::join(ctx.current_dir, output_path)); // All outputs go under BUILD_ROOT_ID. // This ensures Ghost nodes (created for inputs referencing not-yet-generated files) @@ -1917,27 +1962,23 @@ auto expand_command( auto get_or_create_directory_node( BuilderContext& ctx, - std::filesystem::path const& dir_path, + std::string const& dir_path, int depth ) -> Result { - // Normalize first to handle ., .., and redundant separators - auto normalized_path = dir_path.lexically_normal(); + auto normalized_path = pup::path::normalize(dir_path); - // Root directory (empty, ".", or "/") has no parent - return 0 if (normalized_path.empty() || normalized_path == "." || normalized_path == "/") { return NodeId { 0 }; } - // Guard against pathological recursion if (depth > MAX_DIRECTORY_DEPTH) { return make_error(ErrorCode::InvalidArgument, "Directory nesting exceeds maximum depth"); } - auto parent_path = normalized_path.parent_path(); - auto basename = normalized_path.filename().string(); + auto parent_path = std::string { pup::path::parent(normalized_path) }; + auto basename = std::string { pup::path::filename(normalized_path) }; - // Recurse to get/create parent directory auto parent_id_result = get_or_create_directory_node(ctx, parent_path, depth + 1); if (!parent_id_result) { return parent_id_result; @@ -1996,14 +2037,11 @@ auto get_or_create_file_node( // Paths that escape source root become absolute for correct stat() resolution auto resolved = std::string { path }; if (!ctx.current_dir.empty() && path.starts_with("..")) { - auto normalized = (ctx.current_dir / path).lexically_normal(); - // Check if path escapes source root (starts with ..) - if (normalized.string().starts_with("..")) { - // Use absolute path for out-of-tree files (without resolving symlinks) - auto abs = (ctx.options.source_root / normalized).lexically_normal(); - resolved = abs.generic_string(); + auto normalized = pup::path::normalize(pup::path::join(ctx.current_dir, path)); + if (normalized.starts_with("..")) { + resolved = pup::path::normalize(pup::path::join(ctx.options.source_root, normalized)); } else { - resolved = normalized.generic_string(); + resolved = normalized; } } @@ -2025,11 +2063,9 @@ auto get_or_create_file_node( return walk_to_file_node(*ctx.graph, BUILD_ROOT_ID, normalized, NodeType::Generated); } - auto fs_path = fs::path { normalized }; - auto basename = fs_path.filename().string(); + auto basename = std::string { pup::path::filename(normalized) }; - // Get or create parent directory node - auto parent_path = fs_path.parent_path(); + auto parent_path = std::string { pup::path::parent(normalized) }; auto parent_id_result = get_or_create_directory_node(ctx, parent_path); if (!parent_id_result) { return parent_id_result; @@ -2107,8 +2143,8 @@ auto resolve_input_node( } // Node doesn't exist - check filesystem to determine type - auto source_path = ctx.options.source_root / normalized_path; - if (fs::exists(source_path)) { + auto source_path = pup::path::join(ctx.options.source_root, normalized_path); + if (pup::platform::exists(source_path)) { // Source file exists - create File node under SOURCE_ROOT_ID return walk_to_file_node(*ctx.graph, SOURCE_ROOT_ID, normalized_path, NodeType::File); } @@ -2116,15 +2152,15 @@ auto resolve_input_node( // In 3-tree builds, files may live in config_root (alongside Tupfiles) rather than // source_root. Check config_root as a fallback for source file resolution. if (!ctx.options.config_root.empty() && ctx.options.config_root != ctx.options.source_root) { - auto config_path = ctx.options.config_root / normalized_path; - if (fs::exists(config_path)) { + auto config_path = pup::path::join(ctx.options.config_root, normalized_path); + if (pup::platform::exists(config_path)) { return walk_to_file_node(*ctx.graph, SOURCE_ROOT_ID, normalized_path, NodeType::File); } } // Check if file exists in build directory (e.g., tup.config, or already-generated files) - auto build_path = ctx.options.output_root / normalized_path; - if (fs::exists(build_path)) { + auto build_path = pup::path::join(ctx.options.output_root, normalized_path); + if (pup::platform::exists(build_path)) { // File exists in build dir but not source - it's a Generated output from a previous build. // Create as Ghost so the rule that generates it can upgrade it to Generated. // (If the rule no longer generates it, the Ghost remains and causes an error.) @@ -2144,10 +2180,11 @@ auto get_or_create_group_node( ) -> Result { // Check cache first (fast path) - auto key = GroupKey { directory, name }; - auto it = state.group_nodes.find(key); - if (it != state.group_nodes.end()) { - return it->second; + auto key_str = directory + "/" + name; + auto key_id = to_underlying(ctx.graph->intern(key_str)); + auto const* cached = state.group_nodes.find(key_id); + if (cached) { + return *cached; } // Get or create parent directory node @@ -2161,7 +2198,7 @@ auto get_or_create_group_node( // Group nodes are stored with angle-bracket name like "" auto group_basename = "<" + name + ">"; if (auto existing = ctx.graph->find_by_dir_name(parent_id, group_basename)) { - state.group_nodes[key] = *existing; + state.group_nodes.insert(key_id, *existing); return *existing; } @@ -2174,7 +2211,7 @@ auto get_or_create_group_node( auto result = ctx.graph->add_file_node(std::move(node)); if (result) { - state.group_nodes[key] = *result; + state.group_nodes.insert(key_id, *result); } return result; } @@ -2186,18 +2223,15 @@ auto create_command_node( std::string const& display ) -> Result { - // Intern exported_vars - auto exported_var_ids = std::set {}; - for (auto const& var : ctx.exported_vars) { - exported_var_ids.insert(ctx.graph->intern(var)); - } + auto exported = SortedIdVec {}; + exported.merge_from(ctx.exported_vars); auto node = CommandNode { .display = ctx.graph->intern(display), - .source_dir = ctx.graph->intern(ctx.current_dir.generic_string()), + .source_dir = ctx.graph->intern(ctx.current_dir), .instruction_id = ctx.graph->intern(instruction), - .exported_vars = std::move(exported_var_ids), - .guards = ctx.condition_stack, // Apply current guards from condition stack + .exported_vars = std::move(exported), + .guards = ctx.condition_stack, }; auto cmd_id_result = ctx.graph->add_command_node(std::move(node)); @@ -2213,28 +2247,29 @@ auto create_command_node( } // Add sticky edges from used config variables (fine-grained dependency tracking) - for (auto const& var_name : ctx.used_config_vars) { - auto it = state.config_var_nodes.find(var_name); - if (it != state.config_var_nodes.end()) { - (void)ctx.graph->add_edge(it->second, cmd_id, LinkType::Sticky); + auto const* cv = ctx.used_config_vars.data(); + for (std::size_t i = 0, n = ctx.used_config_vars.size(); i < n; ++i) { + auto const* node_id = state.config_var_nodes.find(cv[i]); + if (node_id) { + (void)ctx.graph->add_edge(*node_id, cmd_id, LinkType::Sticky); } } // Add sticky edges from condition config variables (phi-node model) - // Commands inside conditionals need to depend on the config vars used in the condition - // so they rebuild when the condition's value changes - for (auto const& var_name : ctx.condition_config_vars) { - auto it = state.config_var_nodes.find(var_name); - if (it != state.config_var_nodes.end()) { - (void)ctx.graph->add_edge(it->second, cmd_id, LinkType::Sticky); + auto const* ccv = ctx.condition_config_vars.data(); + for (std::size_t i = 0, n = ctx.condition_config_vars.size(); i < n; ++i) { + auto const* node_id = state.config_var_nodes.find(ccv[i]); + if (node_id) { + (void)ctx.graph->add_edge(*node_id, cmd_id, LinkType::Sticky); } } // Add sticky edges from used imported env variables (fine-grained dependency tracking) - for (auto const& var_name : ctx.used_env_vars) { - auto it = state.imported_env_var_nodes.find(var_name); - if (it != state.imported_env_var_nodes.end()) { - (void)ctx.graph->add_edge(it->second, cmd_id, LinkType::Sticky); + auto const* uev = ctx.used_env_vars.data(); + for (std::size_t i = 0, n = ctx.used_env_vars.size(); i < n; ++i) { + auto const* node_id = state.imported_env_var_nodes.find(uev[i]); + if (node_id) { + (void)ctx.graph->add_edge(*node_id, cmd_id, LinkType::Sticky); } } @@ -2249,19 +2284,9 @@ auto create_command_node( auto make_builder_state(BuilderOptions opts) -> BuilderState { - return BuilderState { - .options = std::move(opts), - .errors = {}, - .warnings = {}, - .group_nodes = {}, - .deferred_edges = {}, - .config_var_nodes = {}, - .env_var_dir_id = INVALID_NODE_ID, - .imported_env_var_nodes = {}, - .imported_var_names = {}, - .var_config_deps = {}, - .var_env_deps = {}, - }; + auto state = BuilderState {}; + state.options = std::move(opts); + return state; } auto build_graph( @@ -2276,7 +2301,7 @@ auto build_graph( // For in-tree builds (source == output), this is empty. // For variant builds (-B build), this is "build". if (state.options.source_root != state.options.output_root) { - auto build_root_name = fs::relative(state.options.output_root, state.options.source_root).generic_string(); + auto build_root_name = pup::path::relative(state.options.output_root, state.options.source_root); graph.set_build_root_name(std::move(build_root_name)); } @@ -2301,10 +2326,10 @@ auto add_tupfile( auto const& tupfile_root = state.options.config_root.empty() ? state.options.source_root : state.options.config_root; - auto tupfile_parent = std::filesystem::path { tupfile.filename }.parent_path(); - auto relative_dir = std::filesystem::relative(tupfile_parent, tupfile_root); - if (relative_dir == ".") { - relative_dir = ""; + auto tupfile_parent = std::string { pup::path::parent(tupfile.filename) }; + auto relative_dir_str = pup::path::relative(tupfile_parent, tupfile_root); + if (relative_dir_str == ".") { + relative_dir_str = ""; } auto ctx = BuilderContext { @@ -2312,13 +2337,13 @@ auto add_tupfile( .eval = &eval, .vars = eval.vars, .options = state.options, - .current_dir = relative_dir, + .current_dir = relative_dir_str, .current_file = tupfile.filename, }; // Create Tupfile node and add to sticky_sources for dependency tracking // For 3-tree builds, store relative to config_root (Tupfile's actual location) - auto tupfile_rel = std::filesystem::relative(tupfile.filename, tupfile_root).generic_string(); + auto tupfile_rel = pup::path::relative(tupfile.filename, tupfile_root); auto tupfile_node_result = get_or_create_file_node(ctx, tupfile_rel, NodeType::File); if (tupfile_node_result) { ctx.sticky_sources.push_back(*tupfile_node_result); @@ -2331,8 +2356,8 @@ auto add_tupfile( // Get config directory for Variable node parent (typically the -B directory) auto config_dir_id = NodeId { 0 }; if (!state.options.config_path.empty()) { - auto config_parent = std::filesystem::path { state.options.config_path }.parent_path(); - auto config_dir_rel = std::filesystem::relative(config_parent, state.options.source_root).generic_string(); + auto config_parent = std::string { pup::path::parent(state.options.config_path) }; + auto config_dir_rel = pup::path::relative(config_parent, state.options.source_root); if (config_dir_rel.empty() || config_dir_rel == ".") { config_dir_rel = ""; } @@ -2343,14 +2368,14 @@ auto add_tupfile( } for (auto const& var_name : eval.config_vars->names()) { - // Skip CONFIG_ prefixed names (we store the stripped version) if (var_name.starts_with(parser::builtin_vars::CONFIG_)) { continue; } - // Check if node already exists (shouldn't happen with empty check above, but defensive) + auto var_name_id = to_underlying(graph.intern(var_name)); + if (auto existing = graph.find_by_dir_name(config_dir_id, var_name)) { - state.config_var_nodes[std::string { var_name }] = *existing; + state.config_var_nodes.insert(var_name_id, *existing); continue; } @@ -2364,7 +2389,7 @@ auto add_tupfile( auto var_id_result = graph.add_file_node(std::move(node)); if (var_id_result) { - state.config_var_nodes[std::string { var_name }] = *var_id_result; + state.config_var_nodes.insert(var_name_id, *var_id_result); } } } @@ -2389,32 +2414,34 @@ auto add_tupfile( } } + // Thread string pool into EvalContext for StringId lookups + eval.string_pool = &graph.graph().strings; + // Set up callback to track which config variables are used during expansion eval.on_config_var_used = [&ctx](std::string_view name) { - ctx.used_config_vars.insert(std::string { name }); + ctx.used_config_vars.insert(to_underlying(ctx.graph->intern(name))); }; // Set up callback to track which imported env variables are used during expansion eval.imported_vars = &state.imported_var_names; eval.on_env_var_used = [&ctx](std::string_view name) { - ctx.used_env_vars.insert(std::string { name }); + ctx.used_env_vars.insert(to_underlying(ctx.graph->intern(name))); }; - // Wire up transitive dependency maps for variable tracking - // When $(CXXFLAGS) is expanded and CXXFLAGS depends on @(RELEASE_CXXFLAGS), - // the propagation in eval.cpp will call on_config_var_used("RELEASE_CXXFLAGS") + // Wire up transitive dependency trackers for variable tracking eval.var_config_deps = &state.var_config_deps; eval.var_env_deps = &state.var_env_deps; // Set up resolve_group callback for {group} pattern expansion eval.resolve_group = [&ctx](std::string_view name ) -> std::vector { - auto it = ctx.groups.find(std::string { name }); - if (it == ctx.groups.end()) { + auto gkey = to_underlying(ctx.graph->intern(name)); + auto const* members = ctx.groups.find(gkey); + if (!members) { return {}; } auto paths = std::vector {}; - for (auto id : it->second) { + for (auto id : *members) { auto path = ctx.graph->get_full_path(id); if (!path.empty()) { paths.push_back(std::move(path)); @@ -2428,14 +2455,15 @@ auto add_tupfile( // Groups are first-class nodes; lookup via graph edges (file → group) eval.resolve_order_only_group = [&ctx, &state](std::string_view name ) -> std::vector { - auto dir = ctx.current_dir.empty() ? "." : ctx.current_dir.generic_string(); - auto key = GroupKey { dir, std::string { name } }; - auto it = state.group_nodes.find(key); - if (it == state.group_nodes.end()) { + auto dir = ctx.current_dir.empty() ? std::string { "." } : ctx.current_dir; + auto key_str = dir + "/" + std::string { name }; + auto key_id = to_underlying(ctx.graph->intern(key_str)); + auto const* node_id = state.group_nodes.find(key_id); + if (!node_id) { return {}; } auto paths = std::vector {}; - auto members = get_group_members(*ctx.graph, it->second); + auto members = get_group_members(*ctx.graph, *node_id); for (auto id : members) { auto path = ctx.graph->get_full_path(id); if (!path.empty()) { @@ -2476,8 +2504,12 @@ auto resolve_deferred_order_only_edges( { // Pass 1: Create graph edges and accumulate members per (command, group_name). // Same-named groups from different directories contribute to the same replacement. - using MemberKey = std::pair; // (command_id, group_name) - auto accumulated = std::map> {}; + // Key: packed (command_id << 32 | interned_group_name) + auto pack_key = [](NodeId cmd, std::uint32_t name_id) -> std::uint64_t { + return (static_cast(cmd) << 32) | name_id; + }; + + auto accumulated = std::vector>> {}; for (auto const& edge : state.deferred_edges) { auto const* group_node = graph.get_file_node(edge.group_id); @@ -2494,17 +2526,25 @@ auto resolve_deferred_order_only_edges( (void)graph.add_order_only_edge(edge.group_id, edge.command_id); - auto group_basename = std::string { graph.str(group_node->name) }; + auto group_basename = std::string_view { graph.str(group_node->name) }; if (group_basename.size() > 2 && group_basename.front() == '<' && group_basename.back() == '>') { - auto group_name = group_basename.substr(1, group_basename.size() - 2); - auto& all_members = accumulated[{ edge.command_id, group_name }]; - all_members.insert(all_members.end(), members.begin(), members.end()); + auto bare_name = group_basename.substr(1, group_basename.size() - 2); + auto name_id = to_underlying(graph.intern(bare_name)); + auto key = pack_key(edge.command_id, name_id); + auto it = std::lower_bound(accumulated.begin(), accumulated.end(), key, [](auto const& p, auto k) { return p.first < k; }); + if (it != accumulated.end() && it->first == key) { + it->second.insert(it->second.end(), members.begin(), members.end()); + } else { + accumulated.insert(it, { key, std::move(members) }); + } } } // Pass 2: Replace % patterns with the full accumulated member lists. for (auto const& [key, members] : accumulated) { - auto const& [command_id, group_name] = key; + auto command_id = static_cast(key >> 32); + auto name_id = static_cast(key & 0xFFFFFFFF); + auto group_name = std::string { graph.str(name_id) }; auto pattern = std::format("%<{}>", group_name); auto* cmd_node = graph.get_command_node(command_id); @@ -2517,10 +2557,13 @@ auto resolve_deferred_order_only_edges( } auto source_dir_str = std::string { graph.str(cmd_node->source_dir) }; - auto canonical_cwd = fs::path {}; + auto canonical_cwd = std::string {}; if (!state.options.source_root.empty() && !state.options.output_root.empty() && state.options.source_root != state.options.output_root) { - canonical_cwd = fs::weakly_canonical(state.options.source_root / source_dir_str); + auto r = pup::platform::canonical(pup::path::join(state.options.source_root, source_dir_str)); + if (r) { + canonical_cwd = *r; + } } auto tc = PathTransformContext { .source_to_root = pup::compute_source_to_root(source_dir_str), diff --git a/src/graph/dag.cpp b/src/graph/dag.cpp index 50b6be2..9c59f79 100644 --- a/src/graph/dag.cpp +++ b/src/graph/dag.cpp @@ -6,10 +6,12 @@ #include "pup/core/metrics.hpp" #include "pup/core/path_utils.hpp" +#include "pup/core/path.hpp" +#include "pup/platform/file_io.hpp" + #include #include #include -#include namespace pup::graph { @@ -17,16 +19,12 @@ auto make_graph() -> Graph { auto graph = Graph {}; - // Initialize dir_name_index with pool pointer for transparent lookup - graph.dir_name_index = std::unordered_map( - 0, DirNameKeyHash { &graph.strings }, DirNameKeyEqual { &graph.strings } - ); - // Reserve BUILD_ROOT_ID (1) for the build root node. // All Generated/Ghost nodes will be parented under this node. // The build root's filesystem location is determined at build time // (source_root for in-tree, output_root for variant builds). graph.files.resize(2); // Index 0 unused, index 1 = build root + graph.dir_children.resize(2); graph.files[1] = FileNode { .id = BUILD_ROOT_ID, .type = NodeType::Directory, @@ -76,16 +74,18 @@ auto add_file_node(Graph& graph, FileNode node) -> Result auto const id = graph.next_file_id++; node.id = id; - if (!is_empty(node.name)) { - graph.dir_name_index[DirNameKey { node.parent_dir, node.name }] = id; - } - auto const idx = node_id::index(id); if (idx >= graph.files.size()) { graph.files.resize(idx + 1); + graph.dir_children.resize(idx + 1); } graph.files[idx] = std::move(node); + if (!is_empty(graph.files[idx].name)) { + auto const parent_idx = node_id::index(graph.files[idx].parent_dir); + graph.dir_children[parent_idx].insert(to_underlying(graph.files[idx].name), id); + } + return id; } @@ -112,16 +112,19 @@ auto add_edge(Graph& graph, NodeId from, NodeId to, LinkType type) -> Result(ErrorCode::InvalidNodeId, "Invalid destination node ID"); } - auto const edge_idx = graph.edges.size(); + assert(graph.edges.size() < UINT32_MAX); + auto const edge_idx = static_cast(graph.edges.size()); graph.edges.push_back(Edge { .from = from, .to = to, .type = type, }); - // Update edge indices - graph.edges_from_index[from].push_back(edge_idx); - graph.edges_to_index[to].push_back(edge_idx); + auto old_from = graph.edges_from_index.get_slice(from); + graph.edges_from_index.set_slice(from, graph.edge_arena.append_extend(old_from, edge_idx)); + + auto old_to = graph.edges_to_index.get_slice(to); + graph.edges_to_index.set_slice(to, graph.edge_arena.append_extend(old_to, edge_idx)); return {}; } @@ -135,9 +138,11 @@ auto add_order_only_edge(Graph& graph, NodeId from, NodeId to) -> Result return make_error(ErrorCode::InvalidNodeId, "Invalid destination node ID"); } - // Order-only edges: 'to' depends on 'from' for ordering (not content) - graph.order_only_to_index[to].push_back(from); - graph.order_only_dependents[from].push_back(to); + auto old_to = graph.order_only_to_index.get_slice(to); + graph.order_only_to_index.set_slice(to, graph.edge_arena.append_extend(old_to, from)); + + auto old_deps = graph.order_only_dependents.get_slice(from); + graph.order_only_dependents.set_slice(from, graph.edge_arena.append_extend(old_deps, to)); return {}; } @@ -280,23 +285,33 @@ auto is_guard_satisfied(Graph const& graph, CommandNode const& cmd) -> bool auto find_by_dir_name(Graph const& graph, NodeId parent_dir, std::string_view name) -> std::optional { - // Zero-allocation lookup using transparent hash/equal - auto view = DirNameKeyView { parent_dir, name }; - auto it = graph.dir_name_index.find(view); - if (it != graph.dir_name_index.end()) { - return it->second; + auto name_id = graph.strings.find(name); + if (is_empty(name_id)) { + return std::nullopt; } - return std::nullopt; + auto const parent_idx = node_id::index(parent_dir); + if (parent_idx >= graph.dir_children.size()) { + return std::nullopt; + } + auto const* found = graph.dir_children[parent_idx].find(to_underlying(name_id)); + if (!found) { + return std::nullopt; + } + return *found; } auto find_by_command(Graph const& graph, std::string_view cmd) -> std::optional { assert(graph.command_index_built && "find_by_command() requires build_command_index() first"); - auto it = graph.command_str_index.find(cmd); - if (it != graph.command_str_index.end()) { - return it->second; + auto cmd_id = graph.command_strings.find(cmd); + if (is_empty(cmd_id)) { + return std::nullopt; + } + auto const* found = graph.command_index.find(to_underlying(cmd_id)); + if (!found) { + return std::nullopt; } - return std::nullopt; + return *found; } auto find_by_path(Graph const& graph, std::string_view path) -> std::optional @@ -310,11 +325,13 @@ auto find_by_path(Graph const& graph, std::string_view path, NodeId root) -> std return std::nullopt; } - auto p = std::filesystem::path { path }; auto parent_id = root; + auto remaining = path; - for (auto const& component : p) { - auto name = component.string(); + while (!remaining.empty()) { + auto slash = remaining.find('/'); + auto name = std::string { slash == std::string_view::npos ? remaining : remaining.substr(0, slash) }; + remaining = (slash == std::string_view::npos) ? std::string_view {} : remaining.substr(slash + 1); if (name.empty() || name == ".") { continue; } @@ -358,14 +375,14 @@ auto nodes_of_type(Graph const& graph, NodeType type) -> std::vector auto get_inputs(Graph const& graph, NodeId id) -> std::vector { - auto it = graph.edges_to_index.find(id); - if (it == graph.edges_to_index.end()) { + auto s = graph.edges_to_index.get_slice(id); + if (s.length == 0) { return {}; } - + auto span = graph.edge_arena.slice(s); auto result = std::vector {}; - result.reserve(it->second.size()); - for (auto idx : it->second) { + result.reserve(span.size()); + for (auto idx : span) { result.push_back(graph.edges[idx].from); } return result; @@ -373,13 +390,13 @@ auto get_inputs(Graph const& graph, NodeId id) -> std::vector auto get_outputs(Graph const& graph, NodeId id) -> std::vector { - auto it = graph.edges_from_index.find(id); - if (it == graph.edges_from_index.end()) { + auto s = graph.edges_from_index.get_slice(id); + if (s.length == 0) { return {}; } - + auto span = graph.edge_arena.slice(s); auto result = std::vector {}; - for (auto idx : it->second) { + for (auto idx : span) { auto const& edge = graph.edges[idx]; if (edge.type != LinkType::Sticky) { result.push_back(edge.to); @@ -390,13 +407,13 @@ auto get_outputs(Graph const& graph, NodeId id) -> std::vector auto get_sticky_outputs(Graph const& graph, NodeId id) -> std::vector { - auto it = graph.edges_from_index.find(id); - if (it == graph.edges_from_index.end()) { + auto s = graph.edges_from_index.get_slice(id); + if (s.length == 0) { return {}; } - + auto span = graph.edge_arena.slice(s); auto result = std::vector {}; - for (auto idx : it->second) { + for (auto idx : span) { auto const& edge = graph.edges[idx]; if (edge.type == LinkType::Sticky) { result.push_back(edge.to); @@ -407,20 +424,22 @@ auto get_sticky_outputs(Graph const& graph, NodeId id) -> std::vector auto get_order_only(Graph const& graph, NodeId id) -> std::vector { - auto it = graph.order_only_to_index.find(id); - if (it != graph.order_only_to_index.end()) { - return it->second; + auto s = graph.order_only_to_index.get_slice(id); + if (s.length == 0) { + return {}; } - return {}; + auto span = graph.edge_arena.slice(s); + return { span.begin(), span.end() }; } auto get_order_only_dependents(Graph const& graph, NodeId id) -> std::vector { - auto it = graph.order_only_dependents.find(id); - if (it != graph.order_only_dependents.end()) { - return it->second; + auto s = graph.order_only_dependents.get_slice(id); + if (s.length == 0) { + return {}; } - return {}; + auto span = graph.edge_arena.slice(s); + return { span.begin(), span.end() }; } auto node_count(Graph const& graph) -> std::size_t @@ -450,30 +469,32 @@ auto clear(Graph& graph) -> void graph.conditions.clear(); graph.phi_nodes.clear(); graph.edges.clear(); + graph.edge_arena.clear(); graph.edges_to_index.clear(); graph.edges_from_index.clear(); graph.order_only_to_index.clear(); graph.order_only_dependents.clear(); - graph.dir_name_index.clear(); - graph.command_str_index.clear(); + graph.dir_children.clear(); + graph.command_strings.clear(); + graph.command_index.clear(); + graph.command_index_built = false; graph.strings.clear(); - // Re-initialize dir_name_index with pool pointer - graph.dir_name_index = std::unordered_map( - 0, DirNameKeyHash { &graph.strings }, DirNameKeyEqual { &graph.strings } - ); - // Re-intern build root name auto build_root_name = graph.strings.intern(build_root_name_str); // Reinitialize build root node (same as make_graph) graph.files.resize(2); + graph.dir_children.resize(2); graph.files[1] = FileNode { .id = BUILD_ROOT_ID, .type = NodeType::Directory, .name = build_root_name, .parent_dir = SOURCE_ROOT_ID, }; + if (!is_empty(build_root_name)) { + graph.dir_children[0].insert(to_underlying(build_root_name), BUILD_ROOT_ID); + } graph.next_file_id = 2; graph.next_command_id = node_id::make_command(1); graph.next_condition_id = node_id::make_condition(1); @@ -527,12 +548,12 @@ auto root_nodes(Graph const& graph) -> std::vector auto leaf_nodes(Graph const& graph) -> std::vector { auto has_outputs = [&](NodeId id) { - auto it = graph.edges_from_index.find(id); - if (it == graph.edges_from_index.end()) { + auto s = graph.edges_from_index.get_slice(id); + if (s.length == 0) { return false; } - // Check for non-sticky outputs - for (auto idx : it->second) { + auto span = graph.edge_arena.slice(s); + for (auto idx : span) { if (graph.edges[idx].type != LinkType::Sticky) { return true; } @@ -573,14 +594,15 @@ auto get_full_path(Graph const& graph, NodeId id, PathCache& cache) -> std::stri return ""; } - if (auto it = cache.find(id); it != cache.end()) { - if (it->second.empty()) { + if (cache.ids.contains(id)) { + auto sid = make_string_id(cache.ids.get(id)); + if (is_empty(sid)) { return name; } - return it->second; + return cache.pool.get(sid); } - cache[id] = ""; + cache.ids.set(id, 0); auto path = std::string {}; if (node->parent_dir != 0) { @@ -598,8 +620,9 @@ auto get_full_path(Graph const& graph, NodeId id, PathCache& cache) -> std::stri path = std::string { name }; } - cache[id] = path; - return cache[id]; + auto path_id = cache.pool.intern(path); + cache.ids.set(id, to_underlying(path_id)); + return cache.pool.get(path_id); } auto get_full_path(Graph const& graph, NodeId id) -> std::string @@ -610,22 +633,28 @@ auto get_full_path(Graph const& graph, NodeId id) -> std::string auto invalidate_path_cache(PathCache& cache, NodeId id) -> void { - cache.erase(id); + cache.ids.remove(id); } auto clear_path_cache(PathCache& cache) -> void { - cache.clear(); + cache.ids.clear(); + cache.pool.clear(); } auto set_build_root_name(Graph& graph, std::string name) -> void { + auto old_name = graph.files[BUILD_ROOT_ID].name; + if (!is_empty(old_name)) { + graph.dir_children[0].remove(to_underlying(old_name)); + } + auto name_id = graph.strings.intern(name); graph.files[BUILD_ROOT_ID].name = name_id; - // Register in dir_name_index so lookups for "build" find BUILD_ROOT_ID - // (BUILD_ROOT_ID was created with empty name, so wasn't indexed initially) - graph.dir_name_index[DirNameKey { SOURCE_ROOT_ID, name_id }] = BUILD_ROOT_ID; + if (!is_empty(name_id)) { + graph.dir_children[0].insert(to_underlying(name_id), BUILD_ROOT_ID); + } } auto get_build_root_name(Graph const& graph) -> std::string_view @@ -672,20 +701,21 @@ namespace { auto path_basename(std::string_view path) -> std::string_view { - auto pos = path.rfind('/'); - return pos == std::string_view::npos ? path : path.substr(pos + 1); + return pup::path::filename(path); } auto path_stem(std::string_view name) -> std::string_view { - auto pos = name.rfind('.'); - return pos == std::string_view::npos ? name : name.substr(0, pos); + return pup::path::stem(name); } auto path_extension(std::string_view name) -> std::string_view { - auto pos = name.rfind('.'); - return pos == std::string_view::npos ? std::string_view {} : name.substr(pos + 1); + auto ext = pup::path::extension(name); + if (!ext.empty() && ext[0] == '.') { + ext.remove_prefix(1); + } + return ext; } } // namespace @@ -854,8 +884,8 @@ auto expand_instruction( Graph const& graph, NodeId cmd_id, PathCache& cache, - std::filesystem::path const& source_root, - std::filesystem::path const& config_root + std::string const& source_root, + std::string const& config_root ) -> std::string { auto const* cmd = get_command_node(graph, cmd_id); @@ -864,23 +894,31 @@ auto expand_instruction( } auto source_dir = graph.strings.get(cmd->source_dir); auto source_to_root = pup::compute_source_to_root(source_dir); - auto canonical_cwd = source_root.empty() - ? std::filesystem::path {} - : std::filesystem::weakly_canonical(source_root / std::string { source_dir }); + auto canonical_cwd = std::string {}; + if (!source_root.empty()) { + auto r = pup::platform::canonical(pup::path::join(source_root, std::string { source_dir })); + if (r) { + canonical_cwd = *r; + } + } return expand_instruction_impl(graph, cmd_id, cache, [&](NodeId id) -> std::string { auto full = get_full_path(graph, id, cache); if (!canonical_cwd.empty() && full.starts_with("..")) { - auto abs = std::filesystem::weakly_canonical(source_root / full); - return abs.lexically_relative(canonical_cwd).generic_string(); + auto joined = pup::path::join(source_root, full); + auto abs = pup::platform::canonical(joined); + if (abs) { + return pup::path::relative(*abs, canonical_cwd); + } + return pup::path::relative(pup::path::normalize(joined), canonical_cwd); } - // In 3-tree builds, files may live in config_root rather than source_root. - // Check config_root and compute a canonical relative path from source CWD. if (!config_root.empty() && config_root != source_root - && !std::filesystem::exists(source_root / full) - && std::filesystem::exists(config_root / full)) { - auto canonical_config = std::filesystem::weakly_canonical(config_root / full); - return canonical_config.lexically_relative(canonical_cwd).generic_string(); + && !pup::platform::exists(pup::path::join(source_root, full)) + && pup::platform::exists(pup::path::join(config_root, full))) { + auto r = pup::platform::canonical(pup::path::join(config_root, full)); + if (r) { + return pup::path::relative(*r, canonical_cwd); + } } return pup::make_source_relative(full, source_to_root, source_dir); }); @@ -894,7 +932,8 @@ auto expand_instruction(Graph const& graph, NodeId cmd_id) -> std::string auto build_command_index(Graph& graph, PathCache& cache) -> void { - graph.command_str_index.clear(); + graph.command_strings.clear(); + graph.command_index.clear(); graph.command_index_built = true; auto& metrics = thread_metrics(); for (auto i = std::size_t { 1 }; i < graph.commands.size(); ++i) { @@ -906,7 +945,8 @@ auto build_command_index(Graph& graph, PathCache& cache) -> void auto cmd_str = expand_instruction(graph, id, cache); ++metrics.command_expansions; if (!cmd_str.empty()) { - graph.command_str_index[std::move(cmd_str)] = id; + auto str_id = graph.command_strings.intern(cmd_str); + graph.command_index.insert(to_underlying(str_id), id); } } } diff --git a/src/graph/rule_pattern.cpp b/src/graph/rule_pattern.cpp index 1e41fb2..3b4da08 100644 --- a/src/graph/rule_pattern.cpp +++ b/src/graph/rule_pattern.cpp @@ -19,7 +19,7 @@ auto RulePatternRegistry::match_and_generate(CommandInfo const& cmd) const auto result = std::vector {}; for (auto const& pattern : patterns_) { - if (!std::regex_search(cmd.command, pattern.command_pattern)) { + if (!pattern.matches(cmd.command)) { continue; } @@ -34,7 +34,7 @@ auto RulePatternRegistry::match_and_generate(CommandInfo const& cmd) const auto make_gcc_depfile_pattern() -> RulePattern { return RulePattern { - .command_pattern = std::regex { R"((gcc|g\+\+|clang|clang\+\+|cc|c\+\+).*\s-c\s)" }, + .matches = scanners::matches_gcc_compile, .generate = [](CommandInfo const& cmd) -> std::optional { static auto const scanner = scanners::GccScanner {}; diff --git a/src/graph/scanners/gcc.cpp b/src/graph/scanners/gcc.cpp index 5928578..985fc64 100644 --- a/src/graph/scanners/gcc.cpp +++ b/src/graph/scanners/gcc.cpp @@ -8,8 +8,6 @@ #include #include #include -#include -#include namespace pup::graph::scanners { @@ -190,20 +188,45 @@ auto is_source_file(std::string const& word) -> bool || ext == ".S" || ext == ".s" || ext == ".asm"; } -/// Regex to match GCC/Clang compile commands -/// Requires compiler name followed by whitespace (not /) to avoid matching -/// directory names like "build-gcc/gcc/genpreds" -auto gcc_pattern() -> std::regex const& +} // namespace + +auto matches_gcc_compile(std::string_view command) -> bool { - static auto const pattern = std::regex { R"((?:^|/|\s)(gcc|g\+\+|clang|clang\+\+|cc|c\+\+)\s(?:.*\s)?-c(?:\s|$))" }; - return pattern; -} + auto words = core::tokenize_shell_command(command); + if (words.empty()) { + return false; + } -} // namespace + auto compiler_idx = std::size_t { 0 }; + auto first_basename = std::string_view { words[0] }; + if (auto pos = first_basename.rfind('/'); pos != std::string_view::npos) { + first_basename = first_basename.substr(pos + 1); + } + + if (is_compiler_wrapper(std::string { first_basename }) && words.size() > 1) { + compiler_idx = 1; + } + + auto compiler_basename = std::string_view { words[compiler_idx] }; + if (auto pos = compiler_basename.rfind('/'); pos != std::string_view::npos) { + compiler_basename = compiler_basename.substr(pos + 1); + } + + if (!is_compiler_name(compiler_basename)) { + return false; + } + + for (auto i = compiler_idx + 1; i < words.size(); ++i) { + if (words[i] == "-c") { + return true; + } + } + return false; +} auto GccScanner::matches(CommandInfo const& cmd) const -> bool { - return std::regex_search(cmd.command, gcc_pattern()); + return matches_gcc_compile(cmd.command); } auto GccScanner::has_dep_flags(std::string const& cmd) const -> bool @@ -253,22 +276,23 @@ auto GccScanner::build_dep_command(CommandInfo const& cmd) const -> std::optiona return std::nullopt; } - auto dep_cmd = std::ostringstream {}; + auto dep_cmd = std::string {}; for (auto i = std::size_t { 0 }; i <= compiler_idx; ++i) { if (i > 0) { - dep_cmd << ' '; + dep_cmd += ' '; } - dep_cmd << words[i]; + dep_cmd += words[i]; } - dep_cmd << " -M"; + dep_cmd += " -M"; auto skip_next = false; auto source_files = std::vector {}; for (auto i = compiler_idx + 1; i < words.size(); ++i) { if (skip_next) { - dep_cmd << ' ' << shell_quote(normalize_path_lexically(words[i])); + dep_cmd += ' '; + dep_cmd += shell_quote(normalize_path_lexically(words[i])); skip_next = false; continue; } @@ -285,7 +309,8 @@ auto GccScanner::build_dep_command(CommandInfo const& cmd) const -> std::optiona } if (is_dep_relevant_flag(w)) { - dep_cmd << ' ' << shell_quote(normalize_flag_path(w)); + dep_cmd += ' '; + dep_cmd += shell_quote(normalize_flag_path(w)); if (w == "-I" || w == "-D" || w == "-U" || w == "-include" || w == "-isystem" || w == "-iquote" || w == "-isysroot") { skip_next = true; @@ -299,10 +324,11 @@ auto GccScanner::build_dep_command(CommandInfo const& cmd) const -> std::optiona } for (auto const& src : source_files) { - dep_cmd << ' ' << shell_quote(src); + dep_cmd += ' '; + dep_cmd += shell_quote(src); } - return dep_cmd.str(); + return dep_cmd; } auto GccScanner::dep_spec() const -> DepSpec diff --git a/src/graph/topo.cpp b/src/graph/topo.cpp index 4219e43..d4083a0 100644 --- a/src/graph/topo.cpp +++ b/src/graph/topo.cpp @@ -3,28 +3,28 @@ #include "pup/graph/topo.hpp" +#include "pup/core/node_id_map.hpp" + #include #include -#include -#include namespace pup::graph { namespace { -enum class Color { White, - Gray, - Black }; +// Values match NodeIdMap32::get() default (0 = unvisited) +constexpr auto WHITE = std::uint32_t { 0 }; +constexpr auto GRAY = std::uint32_t { 1 }; +constexpr auto BLACK = std::uint32_t { 2 }; struct DfsState { - std::unordered_map color; - std::unordered_map parent; + NodeIdMap32 color; + NodeIdMap32 parent; std::vector order; std::vector cycle; bool has_cycle = false; }; -// Forward declaration for mutual recursion auto dfs_visit(BuildGraph const& graph, NodeId u, DfsState& state) -> void; auto visit_neighbors( @@ -38,17 +38,17 @@ auto visit_neighbors( if (state.has_cycle) { return; } - if (state.color[v] == Color::White) { - state.parent[v] = u; + if (state.color.get(v) == WHITE) { + state.parent.set(v, u); dfs_visit(graph, v, state); - } else if (state.color[v] == Color::Gray) { + } else if (state.color.get(v) == GRAY) { state.has_cycle = true; state.cycle.clear(); state.cycle.push_back(v); auto curr = u; while (curr != v) { state.cycle.push_back(curr); - curr = state.parent[curr]; + curr = state.parent.get(curr); } state.cycle.push_back(v); std::ranges::reverse(state.cycle); @@ -61,11 +61,11 @@ auto dfs_visit(BuildGraph const& graph, NodeId u, DfsState& state) -> void if (state.has_cycle) { return; } - state.color[u] = Color::Gray; + state.color.set(u, GRAY); visit_neighbors(graph, u, graph.get_outputs(u), state); visit_neighbors(graph, u, graph.get_order_only_dependents(u), state); if (!state.has_cycle) { - state.color[u] = Color::Black; + state.color.set(u, BLACK); state.order.push_back(u); } } @@ -76,14 +76,12 @@ auto topological_sort(BuildGraph const& graph) -> TopoSortResult { auto state = DfsState {}; - // Initialize all nodes as white (unvisited) for (auto id : graph.all_nodes()) { - state.color[id] = Color::White; + state.color.set(id, WHITE); } - // DFS from all unvisited nodes for (auto id : graph.all_nodes()) { - if (state.color[id] == Color::White) { + if (state.color.get(id) == WHITE) { dfs_visit(graph, id, state); } if (state.has_cycle) { @@ -91,7 +89,6 @@ auto topological_sort(BuildGraph const& graph) -> TopoSortResult } } - // Reverse for topological order (dependencies first) std::ranges::reverse(state.order); return TopoSortResult { @@ -118,7 +115,7 @@ auto has_path(BuildGraph const& graph, NodeId source, NodeId target) -> bool return true; } - auto visited = std::unordered_set {}; + auto visited = NodeIdMap32 {}; auto stack = std::stack {}; stack.push(source); @@ -135,7 +132,7 @@ auto has_path(BuildGraph const& graph, NodeId source, NodeId target) -> bool continue; } - visited.insert(u); + visited.set(u, 1); for (auto v : graph.get_outputs(u)) { if (!visited.contains(v)) { diff --git a/src/index/entry.cpp b/src/index/entry.cpp index f93b6d5..863220d 100644 --- a/src/index/entry.cpp +++ b/src/index/entry.cpp @@ -5,9 +5,10 @@ #include "pup/core/hash.hpp" #include "pup/core/path_utils.hpp" +#include #include -#include #include +#include #include namespace pup::index { @@ -104,10 +105,15 @@ auto EdgeEntry::from_raw(RawEdge const& raw) -> EdgeEntry auto Index::add_edge(EdgeEntry entry) -> void { - auto const idx = edges_.size(); + assert(edges_.size() < UINT32_MAX); + auto const idx = static_cast(edges_.size()); edges_.push_back(entry); - edges_from_index_[edges_[idx].from].push_back(idx); - edges_to_index_[edges_[idx].to].push_back(idx); + + auto old_from = edges_from_index_.get_slice(edges_[idx].from); + edges_from_index_.set_slice(edges_[idx].from, edge_arena_.append_extend(old_from, idx)); + + auto old_to = edges_to_index_.get_slice(edges_[idx].to); + edges_to_index_.set_slice(edges_[idx].to, edge_arena_.append_extend(old_to, idx)); } auto Index::find_file_by_id(NodeId id) const -> FileEntry const* @@ -140,25 +146,29 @@ auto Index::find_command_by_id(NodeId id) const -> CommandEntry const* auto Index::find_command_by_command(std::string const& cmd) const -> CommandEntry const* { - auto it = command_index_.find(cmd); - if (it != command_index_.end()) { - return &commands_[it->second]; + auto str_id = command_strings_.find(cmd); + if (is_empty(str_id)) { + return nullptr; + } + auto const* idx = command_index_.find(to_underlying(str_id)); + if (!idx) { + return nullptr; } - return nullptr; + return &commands_[*idx]; } -auto Index::lookup_edges( - std::unordered_map> const& index, - NodeId id -) const -> std::vector +auto Index::lookup_edges(NodeIdArenaIndex const& index, NodeId id) const + -> std::vector { + auto s = index.get_slice(id); + if (s.length == 0) { + return {}; + } + auto span = edge_arena_.slice(s); auto result = std::vector {}; - auto it = index.find(id); - if (it != index.end()) { - result.reserve(it->second.size()); - for (auto idx : it->second) { - result.push_back(&edges_[idx]); - } + result.reserve(span.size()); + for (auto idx : span) { + result.push_back(&edges_[idx]); } return result; } @@ -175,26 +185,32 @@ auto Index::edges_to(NodeId id) const -> std::vector auto Index::build_edge_indices() -> void { + edge_arena_.clear(); edges_from_index_.clear(); edges_to_index_.clear(); for (auto i = std::size_t { 0 }; i < edges_.size(); ++i) { - edges_from_index_[edges_[i].from].push_back(i); - edges_to_index_[edges_[i].to].push_back(i); + auto const idx = static_cast(i); + auto old_from = edges_from_index_.get_slice(edges_[i].from); + edges_from_index_.set_slice(edges_[i].from, edge_arena_.append_extend(old_from, idx)); + + auto old_to = edges_to_index_.get_slice(edges_[i].to); + edges_to_index_.set_slice(edges_[i].to, edge_arena_.append_extend(old_to, idx)); } - // Rebuild command index using reconstructed command strings rebuild_command_index(); } auto Index::rebuild_command_index() -> void { + command_strings_.clear(); command_index_.clear(); for (auto i = std::size_t { 0 }; i < commands_.size(); ++i) { auto cmd_str = get_command_string(*this, commands_[i]); if (!cmd_str.empty()) { - command_index_[std::move(cmd_str)] = i; + auto str_id = command_strings_.intern(cmd_str); + command_index_.insert(to_underlying(str_id), static_cast(i)); } } } @@ -266,8 +282,11 @@ auto Index::clear() -> void files_.clear(); commands_.clear(); edges_.clear(); + edge_arena_.clear(); edges_from_index_.clear(); edges_to_index_.clear(); + command_strings_.clear(); + command_index_.clear(); } namespace { diff --git a/src/index/reader.cpp b/src/index/reader.cpp index 2bb4057..82b6002 100644 --- a/src/index/reader.cpp +++ b/src/index/reader.cpp @@ -5,7 +5,6 @@ #include "pup/core/hash.hpp" #include -#include #include namespace pup::index { @@ -38,7 +37,7 @@ auto read_raw_entries( } // namespace -auto open_index(std::filesystem::path const& path) -> Result +auto open_index(std::string const& path) -> Result { auto result = IndexFile {}; @@ -66,23 +65,17 @@ auto open_index(std::filesystem::path const& path) -> Result return result; } -auto is_valid_index(std::filesystem::path const& path) -> bool +auto is_valid_index(std::string const& path) -> bool { - auto file = std::ifstream { path, std::ios::binary }; - if (!file) { + auto file = pup::platform::MappedFile::open(path); + if (!file || file->size() < sizeof(RawHeader)) { return false; } - auto header = RawHeader {}; - file.read(reinterpret_cast(&header), sizeof(header)); - - if (!file || file.gcount() != static_cast(sizeof(header))) { - return false; - } - - // v8 format only - return std::memcmp(header.magic.data(), INDEX_MAGIC.data(), 4) == 0 - && header.version == INDEX_VERSION; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + auto const* header = reinterpret_cast(file->data()); + return std::memcmp(header->magic.data(), INDEX_MAGIC.data(), 4) == 0 + && header->version == INDEX_VERSION; } auto read_index(IndexFile const& f) -> Result @@ -135,7 +128,7 @@ auto read_index(IndexFile const& f) -> Result return index; } -auto read_index(std::filesystem::path const& path) -> Result +auto read_index(std::string const& path) -> Result { auto file_result = open_index(path); if (!file_result) { diff --git a/src/index/writer.cpp b/src/index/writer.cpp index dd742da..5ccaae1 100644 --- a/src/index/writer.cpp +++ b/src/index/writer.cpp @@ -126,7 +126,7 @@ auto build_header( } // namespace auto write_index( - std::filesystem::path const& path, + std::string const& path, Index const& index ) -> Result { diff --git a/src/parser/config.cpp b/src/parser/config.cpp index a02642e..949d193 100644 --- a/src/parser/config.cpp +++ b/src/parser/config.cpp @@ -2,9 +2,7 @@ // Copyright (c) 2024 Putup authors #include "pup/parser/config.hpp" - -#include -#include +#include "pup/platform/file_io.hpp" namespace pup::parser { @@ -104,17 +102,13 @@ auto parse_config_string(std::string_view content) -> Result return db; } -auto parse_config(std::filesystem::path const& path) -> Result +auto parse_config(std::string const& path) -> Result { - auto file = std::ifstream { path }; - if (!file) { - return make_error(ErrorCode::NotFound, "Cannot open config file: " + path.string()); + auto content = pup::platform::read_file(path); + if (!content) { + return make_error(ErrorCode::NotFound, "Cannot open config file: " + path); } - - auto ss = std::ostringstream {}; - ss << file.rdbuf(); - - return parse_config_string(ss.str()); + return parse_config_string(*content); } } // namespace pup::parser diff --git a/src/parser/depfile.cpp b/src/parser/depfile.cpp index c231536..da0dc9a 100644 --- a/src/parser/depfile.cpp +++ b/src/parser/depfile.cpp @@ -2,9 +2,7 @@ // Copyright (c) 2024 Putup authors #include "pup/parser/depfile.hpp" - -#include -#include +#include "pup/platform/file_io.hpp" namespace pup::parser { @@ -95,17 +93,13 @@ auto parse_path(std::string_view& sv, bool stop_at_colon = false) -> std::string } // anonymous namespace -auto parse_depfile(std::filesystem::path const& path) -> Result +auto parse_depfile(std::string const& path) -> Result { - auto file = std::ifstream { path }; - if (!file) { + auto content = pup::platform::read_file(path); + if (!content) { return make_error(ErrorCode::IoError, "Failed to open depfile"); } - - auto buffer = std::stringstream {}; - buffer << file.rdbuf(); - auto content = buffer.str(); - return parse_depfile(std::string_view { content }); + return parse_depfile(std::string_view { *content }); } auto parse_depfile(std::string_view content) -> Result diff --git a/src/parser/eval.cpp b/src/parser/eval.cpp index 6ec11b1..b424bbb 100644 --- a/src/parser/eval.cpp +++ b/src/parser/eval.cpp @@ -4,11 +4,14 @@ #include "pup/parser/eval.hpp" #include "pup/core/platform.hpp" +#include "pup/core/string_id.hpp" +#include "pup/graph/builder.hpp" + +#include "pup/core/path.hpp" #include #include #include -#include namespace pup::parser { @@ -16,51 +19,96 @@ namespace pup::parser { // VarDb // ============================================================================= +auto VarDb::find_entry(std::string_view name) -> Entry* +{ + auto lo = std::size_t { 0 }; + auto hi = entries_.size(); + while (lo < hi) { + auto mid = lo + (hi - lo) / 2; + if (entries_[mid].name < name) { + lo = mid + 1; + } else { + hi = mid; + } + } + if (lo < entries_.size() && entries_[lo].name == name) { + return &entries_[lo]; + } + return nullptr; +} + +auto VarDb::find_entry(std::string_view name) const -> Entry const* +{ + return const_cast(this)->find_entry(name); +} + auto VarDb::set(std::string_view name, std::string value) -> void { - vars_[std::string { name }] = std::move(value); + if (auto* e = find_entry(name)) { + e->value = std::move(value); + return; + } + auto lo = std::size_t { 0 }; + auto hi = entries_.size(); + while (lo < hi) { + auto mid = lo + (hi - lo) / 2; + if (entries_[mid].name < name) { + lo = mid + 1; + } else { + hi = mid; + } + } + entries_.insert(entries_.begin() + static_cast(lo), Entry { std::string { name }, std::move(value) }); } auto VarDb::append(std::string_view name, std::string_view value) -> void { - auto it = vars_.find(name); // Heterogeneous lookup - if (it == vars_.end()) { - vars_[std::string { name }] = std::string { value }; - } else { - if (!it->second.empty()) { - it->second += ' '; - } - it->second += value; + if (auto* e = find_entry(name)) { + if (!e->value.empty()) { + e->value += ' '; + } + e->value += value; + return; } + auto lo = std::size_t { 0 }; + auto hi = entries_.size(); + while (lo < hi) { + auto mid = lo + (hi - lo) / 2; + if (entries_[mid].name < name) { + lo = mid + 1; + } else { + hi = mid; + } + } + entries_.insert(entries_.begin() + static_cast(lo), Entry { std::string { name }, std::string { value } }); } auto VarDb::get(std::string_view name) const -> std::string_view { - auto it = vars_.find(name); // Heterogeneous lookup - no temp string - if (it != vars_.end()) { - return it->second; + if (auto const* e = find_entry(name)) { + return e->value; } return {}; } auto VarDb::contains(std::string_view name) const -> bool { - return vars_.contains(name); + return find_entry(name) != nullptr; } auto VarDb::names() const -> std::vector { auto result = std::vector {}; - result.reserve(vars_.size()); - for (auto const& [name, _] : vars_) { - result.push_back(name); + result.reserve(entries_.size()); + for (auto const& e : entries_) { + result.push_back(e.name); } return result; } auto VarDb::clear() -> void { - vars_.clear(); + entries_.clear(); } // ============================================================================= @@ -135,8 +183,11 @@ auto lookup_var_with_bank(VarContext const& ctx, std::string_view name, VarRef:: // Regular variables have priority over config if (ctx.vars && ctx.vars->contains(name)) { // Check if this is an imported env var - if (ctx.imported_vars && ctx.imported_vars->contains(std::string { name })) { - return { ctx.vars->get(name), VarBank::Env }; + if (ctx.imported_vars && ctx.string_pool) { + auto id = ctx.string_pool->find(name); + if (!pup::is_empty(id) && ctx.imported_vars->contains(pup::to_underlying(id))) { + return { ctx.vars->get(name), VarBank::Env }; + } } return { ctx.vars->get(name), VarBank::Regular }; } @@ -176,6 +227,7 @@ auto make_var_context(EvalContext const& ctx) -> VarContext .tup_srcdir = ctx.tup_srcdir, .tup_outdir = ctx.tup_outdir, .imported_vars = ctx.imported_vars, + .string_pool = ctx.string_pool, }; } @@ -195,11 +247,14 @@ auto expand_var(EvalContext& ctx, VarRef const& ref) -> Result // Propagate transitive config var dependencies for regular variables if (ref.kind == VarRef::Kind::Regular && bank == VarBank::Regular - && ctx.var_config_deps && ctx.on_config_var_used) { - auto it = ctx.var_config_deps->find(ref.name); - if (it != ctx.var_config_deps->end()) { - for (auto const& dep : it->second) { - ctx.on_config_var_used(dep); + && ctx.var_config_deps && ctx.on_config_var_used && ctx.string_pool) { + auto name_id = ctx.string_pool->find(ref.name); + if (!pup::is_empty(name_id)) { + if (auto const* deps = ctx.var_config_deps->find(name_id)) { + auto const* d = deps->data(); + for (std::size_t i = 0, n = deps->size(); i < n; ++i) { + ctx.on_config_var_used(ctx.string_pool->get(pup::make_string_id(d[i]))); + } } } } @@ -211,11 +266,14 @@ auto expand_var(EvalContext& ctx, VarRef const& ref) -> Result // Propagate transitive env var dependencies for regular variables if (ref.kind == VarRef::Kind::Regular && bank == VarBank::Regular - && ctx.var_env_deps && ctx.on_env_var_used) { - auto it = ctx.var_env_deps->find(ref.name); - if (it != ctx.var_env_deps->end()) { - for (auto const& dep : it->second) { - ctx.on_env_var_used(dep); + && ctx.var_env_deps && ctx.on_env_var_used && ctx.string_pool) { + auto name_id = ctx.string_pool->find(ref.name); + if (!pup::is_empty(name_id)) { + if (auto const* deps = ctx.var_env_deps->find(name_id)) { + auto const* d = deps->data(); + for (std::size_t i = 0, n = deps->size(); i < n; ++i) { + ctx.on_env_var_used(ctx.string_pool->get(pup::make_string_id(d[i]))); + } } } } @@ -509,7 +567,7 @@ auto expand_path( if (end > start) { // Normalize path to remove // and resolve . and .. components auto path_str = expanded.substr(start, end - start); - auto normalized = std::filesystem::path { path_str }.lexically_normal().generic_string(); + auto normalized = pup::path::normalize(path_str); result.push_back(std::move(normalized)); } start = end; diff --git a/src/parser/glob.cpp b/src/parser/glob.cpp index 576caf4..334857d 100644 --- a/src/parser/glob.cpp +++ b/src/parser/glob.cpp @@ -2,6 +2,8 @@ // Copyright (c) 2024 Putup authors #include "pup/parser/glob.hpp" +#include "pup/core/path.hpp" +#include "pup/platform/file_io.hpp" #include #include @@ -189,87 +191,66 @@ auto Glob::match_bracket(std::string_view& pattern, char c) const -> bool auto glob_expand( std::string_view pattern, - std::filesystem::path const& base_dir, + std::string const& base_dir, GlobOptions const& options ) -> Result> { - namespace fs = std::filesystem; - auto results = std::vector {}; - // Check if pattern has wildcards if (!has_glob_chars(pattern)) { - // Literal path - just check if it exists - auto path = fs::path { base_dir / pattern }; - if (fs::exists(path)) { + auto path = base_dir + "/" + std::string { pattern }; + if (pup::platform::exists(path)) { results.emplace_back(pattern); } return results; } - // Split into directory and pattern parts auto [dir_part, file_pattern] = glob_split_path(pattern); - auto search_dir = fs::path { dir_part.empty() ? base_dir : base_dir / dir_part }; + auto search_dir = dir_part.empty() ? base_dir : base_dir + "/" + std::string { dir_part }; - if (!fs::exists(search_dir) || !fs::is_directory(search_dir)) { + if (!pup::platform::exists(search_dir) || !pup::platform::is_directory(search_dir)) { return results; } auto glob = Glob { file_pattern }; - // Check if we need recursive search auto const is_recursive = glob.is_recursive() && options.recursive; - auto iterate = [&](auto const& entry) { - auto const& path = entry.path(); - auto filename = path.filename().string(); - - // Skip hidden files unless requested - if (!options.include_hidden && !filename.empty() && filename[0] == '.') { - return; - } - - // For recursive, match against relative path - if (is_recursive) { - auto rel = fs::relative(path, search_dir); - if (glob.matches(rel.generic_string())) { - auto result_path = dir_part.empty() ? rel.generic_string() : std::string { dir_part } + "/" + rel.generic_string(); - results.push_back(result_path); + if (is_recursive) { + (void)pup::platform::walk_directory(search_dir, [&](pup::platform::DirEntry const& entry, std::string const& rel_path) -> bool { + auto name = entry.name; + if (!options.include_hidden && !name.empty() && name[0] == '.') { + return false; } - } else { - // Match just the filename - if (glob.matches(filename)) { - auto result_path = dir_part.empty() ? filename : std::string { dir_part } + "/" + filename; + if (glob.matches(rel_path)) { + auto result_path = dir_part.empty() ? rel_path : std::string { dir_part } + "/" + rel_path; results.push_back(result_path); } - } - }; - - auto ec = std::error_code {}; - if (is_recursive) { - for (auto const& entry : fs::recursive_directory_iterator(search_dir, ec)) { - if (ec) { - break; - } - iterate(entry); - } + return true; + }); } else { - for (auto const& entry : fs::directory_iterator(search_dir, ec)) { - if (ec) { - break; + auto entries = pup::platform::read_directory(search_dir); + if (entries) { + for (auto const& entry : *entries) { + auto const& name = entry.name; + if (!options.include_hidden && !name.empty() && name[0] == '.') { + continue; + } + if (glob.matches(name)) { + auto result_path = dir_part.empty() ? name : std::string { dir_part } + "/" + name; + results.push_back(result_path); + } } - iterate(entry); } } - // Sort for consistent output std::ranges::sort(results); return results; } auto glob_expand_all( std::vector const& patterns, - std::filesystem::path const& base_dir, + std::string const& base_dir, GlobOptions const& options ) -> Result { @@ -355,40 +336,26 @@ auto has_glob_chars(std::string_view pattern) -> bool auto path_basename(std::string_view path) -> std::string_view { - auto last_slash = path.rfind('/'); - if (last_slash == std::string_view::npos) { - return path; - } - return path.substr(last_slash + 1); + return pup::path::filename(path); } auto path_stem(std::string_view path) -> std::string_view { - auto base = path_basename(path); - auto dot = base.rfind('.'); - if (dot == std::string_view::npos || dot == 0) { - return base; - } - return base.substr(0, dot); + return pup::path::stem(path); } auto path_extension(std::string_view path) -> std::string_view { - auto base = path_basename(path); - auto dot = base.rfind('.'); - if (dot == std::string_view::npos || dot == 0) { - return {}; + auto ext = pup::path::extension(path); + if (!ext.empty() && ext[0] == '.') { + ext.remove_prefix(1); } - return base.substr(dot + 1); + return ext; } auto path_directory(std::string_view path) -> std::string_view { - auto last_slash = path.rfind('/'); - if (last_slash == std::string_view::npos) { - return {}; - } - return path.substr(0, last_slash); + return pup::path::parent(path); } auto glob_match_extract(std::string_view pattern, std::string_view filename) -> std::string diff --git a/src/parser/ignore.cpp b/src/parser/ignore.cpp index e17c6a0..dd97d90 100644 --- a/src/parser/ignore.cpp +++ b/src/parser/ignore.cpp @@ -2,8 +2,7 @@ // Copyright (c) 2024 Putup authors #include "pup/parser/ignore.hpp" - -#include +#include "pup/platform/file_io.hpp" namespace pup::parser { @@ -11,36 +10,41 @@ namespace pup::parser { // IgnoreList // ============================================================================= -auto IgnoreList::load(std::filesystem::path const& path) -> Result +auto IgnoreList::load(std::string const& path) -> Result { - auto file = std::ifstream { path }; - if (!file) { - return make_error(ErrorCode::IoError, "Failed to open ignore file: " + path.string()); + auto content = pup::platform::read_file(path); + if (!content) { + return make_error(ErrorCode::IoError, "Failed to open ignore file: " + path); } auto list = IgnoreList::with_defaults(); - auto line = std::string {}; + auto sv = std::string_view { *content }; - while (std::getline(file, line)) { - // Skip empty lines and comments - if (line.empty()) { - continue; + while (!sv.empty()) { + auto nl = sv.find('\n'); + auto raw = (nl == std::string_view::npos) ? sv : sv.substr(0, nl); + sv = (nl == std::string_view::npos) ? std::string_view {} : sv.substr(nl + 1); + + // Strip \r + if (!raw.empty() && raw.back() == '\r') { + raw.remove_suffix(1); } - // Trim leading whitespace - auto start = line.find_first_not_of(" \t"); - if (start == std::string::npos) { + if (raw.empty()) { continue; } - line = line.substr(start); + auto start = raw.find_first_not_of(" \t"); + if (start == std::string_view::npos) { + continue; + } + raw = raw.substr(start); - // Skip comments - if (line[0] == '#') { + if (raw[0] == '#') { continue; } - // Trim trailing whitespace (but preserve escaped spaces) + auto line = std::string { raw }; while (!line.empty() && (line.back() == ' ' || line.back() == '\t')) { if (line.size() >= 2 && line[line.size() - 2] == '\\') { break; @@ -111,7 +115,7 @@ auto IgnoreList::parse_pattern(std::string_view line) -> std::optional bool +auto IgnoreList::is_ignored(std::string const& rel_path) const -> bool { auto ignored = false; @@ -125,24 +129,18 @@ auto IgnoreList::is_ignored(std::filesystem::path const& rel_path) const -> bool return ignored; } -auto IgnoreList::match_pattern(IgnorePattern const& p, std::filesystem::path const& path) const -> bool +auto IgnoreList::match_pattern(IgnorePattern const& p, std::string const& path_str) const -> bool { - auto path_str = path.generic_string(); - - // For anchored patterns, match against full path from root - // For unanchored patterns, match against any path component if (p.anchored) { - // Anchored: must match from start return glob_match(p.pattern, path_str); } - // Unanchored: try matching against basename first - auto basename = path.filename().string(); + auto slash_pos = path_str.rfind('/'); + auto basename = (slash_pos == std::string::npos) ? path_str : path_str.substr(slash_pos + 1); if (glob_match(p.pattern, basename)) { return true; } - // Also try matching against full path for patterns like **/foo if (p.pattern.starts_with("**")) { return glob_match(p.pattern, path_str); } diff --git a/src/platform/file_io-posix.cpp b/src/platform/file_io-posix.cpp index 4ac4227..b127cfd 100644 --- a/src/platform/file_io-posix.cpp +++ b/src/platform/file_io-posix.cpp @@ -1,11 +1,15 @@ // SPDX-License-Identifier: MIT // Copyright (c) 2024 Putup authors +#include "pup/core/path.hpp" #include "pup/platform/file_io.hpp" +#include +#include #include +#include +#include #include -#include #include #include #include @@ -51,7 +55,7 @@ auto MappedFile::is_open() const -> bool return impl_ && impl_->data != nullptr; } -auto MappedFile::open(std::filesystem::path const& path) -> Result +auto MappedFile::open(std::string const& path) -> Result { auto file = MappedFile {}; file.impl_ = std::make_unique(); @@ -103,7 +107,7 @@ auto MappedFile::close() -> void impl_.reset(); } -auto stat_file(std::filesystem::path const& path) -> Result +auto stat_file(std::string const& path) -> Result { struct stat st { }; if (::stat(path.c_str(), &st) < 0) { @@ -125,28 +129,27 @@ auto stat_file(std::filesystem::path const& path) -> Result } auto atomic_write( - std::filesystem::path const& path, + std::string const& path, std::span data ) -> Result { - auto parent = path.parent_path(); - if (!parent.empty()) { - auto ec = std::error_code {}; - std::filesystem::create_directories(parent, ec); - if (ec) { - return make_error(ErrorCode::IoError, "Failed to create directory"); + auto par = std::string { pup::path::parent(path) }; + if (!par.empty()) { + auto r = create_directories(par); + if (!r) { + return r; } } - auto temp_path = std::filesystem::path { path }; - temp_path += ".tmp."; + auto temp_path = path + ".tmp."; - auto rd = std::random_device {}; - auto gen = std::mt19937 { std::random_device::result_type { rd() } }; - auto dist = std::uniform_int_distribution<> { 0, 15 }; + struct timespec ts { }; + clock_gettime(CLOCK_MONOTONIC, &ts); + auto seed = static_cast(getpid()) ^ static_cast(ts.tv_nsec); auto const* const hex = "0123456789abcdef"; for (auto i = 0; i < 8; ++i) { - temp_path += hex[dist(gen)]; + seed = seed * 1103515245u + 12345u; + temp_path += hex[(seed >> 16) & 0xF]; } // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) @@ -177,4 +180,426 @@ auto atomic_write( return {}; } +// Filesystem queries + +auto exists(std::string const& path) -> bool +{ + struct stat st { }; + return ::stat(path.c_str(), &st) == 0; +} + +auto is_file(std::string const& path) -> bool +{ + struct stat st { }; + if (::stat(path.c_str(), &st) != 0) { + return false; + } + return S_ISREG(st.st_mode); +} + +auto is_directory(std::string const& path) -> bool +{ + struct stat st { }; + if (::stat(path.c_str(), &st) != 0) { + return false; + } + return S_ISDIR(st.st_mode); +} + +auto is_symlink(std::string const& path) -> bool +{ + struct stat st { }; + if (::lstat(path.c_str(), &st) != 0) { + return false; + } + return S_ISLNK(st.st_mode); +} + +auto is_empty(std::string const& path) -> bool +{ + struct stat st { }; + if (::stat(path.c_str(), &st) != 0) { + return true; + } + if (S_ISDIR(st.st_mode)) { + auto* dir = ::opendir(path.c_str()); + if (!dir) { + return true; + } + struct dirent* entry = nullptr; + while ((entry = ::readdir(dir)) != nullptr) { + if (std::strcmp(entry->d_name, ".") != 0 && std::strcmp(entry->d_name, "..") != 0) { + ::closedir(dir); + return false; + } + } + ::closedir(dir); + return true; + } + return st.st_size == 0; +} + +// Filesystem mutations + +namespace { + +auto mkdir_recursive(std::string const& path) -> bool +{ + if (path.empty()) { + return true; + } + + struct stat st { }; + if (::stat(path.c_str(), &st) == 0) { + return S_ISDIR(st.st_mode); + } + + auto par = std::string { pup::path::parent(path) }; + if (!par.empty() && par != path) { + if (!mkdir_recursive(par)) { + return false; + } + } + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + return ::mkdir(path.c_str(), 0755) == 0 || errno == EEXIST; +} + +} // namespace + +auto create_directories(std::string const& path) -> Result +{ + if (path.empty()) { + return {}; + } + if (!mkdir_recursive(path)) { + return make_error(ErrorCode::IoError, "Failed to create directories: " + path); + } + return {}; +} + +auto remove_file(std::string const& path) -> Result +{ + struct stat st { }; + if (::lstat(path.c_str(), &st) != 0) { + if (errno == ENOENT) { + return {}; + } + return make_error(ErrorCode::IoError, "Failed to stat: " + path); + } + if (S_ISDIR(st.st_mode)) { + if (::rmdir(path.c_str()) != 0) { + return make_error(ErrorCode::IoError, "Failed to remove directory: " + path); + } + } else { + if (::unlink(path.c_str()) != 0) { + return make_error(ErrorCode::IoError, "Failed to remove file: " + path); + } + } + return {}; +} + +namespace { + +auto remove_all_recursive(std::string const& path) -> bool +{ + struct stat st { }; + if (::lstat(path.c_str(), &st) != 0) { + return errno == ENOENT; + } + + if (!S_ISDIR(st.st_mode)) { + return ::unlink(path.c_str()) == 0; + } + + auto* dir = ::opendir(path.c_str()); + if (!dir) { + return false; + } + + auto ok = true; + struct dirent* entry = nullptr; + while ((entry = ::readdir(dir)) != nullptr) { + if (std::strcmp(entry->d_name, ".") == 0 || std::strcmp(entry->d_name, "..") == 0) { + continue; + } + auto child = path + "/" + entry->d_name; + if (!remove_all_recursive(child)) { + ok = false; + } + } + ::closedir(dir); + + if (ok) { + ok = (::rmdir(path.c_str()) == 0); + } + return ok; +} + +} // namespace + +auto remove_all(std::string const& path) -> Result +{ + if (!remove_all_recursive(path)) { + return make_error(ErrorCode::IoError, "Failed to remove: " + path); + } + return {}; +} + +auto rename_path(std::string const& from, std::string const& to) -> Result +{ + if (::rename(from.c_str(), to.c_str()) != 0) { + return make_error(ErrorCode::IoError, "Failed to rename: " + from + " -> " + to); + } + return {}; +} + +auto copy_file(std::string const& from, std::string const& to) -> Result +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + auto src_fd = ::open(from.c_str(), O_RDONLY); + if (src_fd < 0) { + return make_error(ErrorCode::IoError, "Failed to open source: " + from); + } + + struct stat st { }; + ::fstat(src_fd, &st); + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + auto dst_fd = ::open(to.c_str(), O_WRONLY | O_CREAT | O_TRUNC, st.st_mode & 0777); + if (dst_fd < 0) { + ::close(src_fd); + return make_error(ErrorCode::IoError, "Failed to create destination: " + to); + } + + char buf[8192]; + auto ok = true; + while (true) { + auto n = ::read(src_fd, buf, sizeof(buf)); + if (n < 0) { + if (errno == EINTR) { + continue; + } + ok = false; + break; + } + if (n == 0) { + break; + } + if (::write(dst_fd, buf, static_cast(n)) != n) { + ok = false; + break; + } + } + + ::close(src_fd); + ::close(dst_fd); + + if (!ok) { + ::unlink(to.c_str()); + return make_error(ErrorCode::IoError, "Failed to copy: " + from + " -> " + to); + } + return {}; +} + +// Path resolution + +auto current_directory() -> Result +{ + char buf[4096]; + if (::getcwd(buf, sizeof(buf)) == nullptr) { + return make_error(ErrorCode::IoError, "Failed to get current directory"); + } + return std::string { buf }; +} + +auto canonical(std::string const& path) -> Result +{ + char* resolved = ::realpath(path.c_str(), nullptr); + if (resolved) { + auto result = std::string { resolved }; + ::free(resolved); // NOLINT(cppcoreguidelines-no-malloc) + return result; + } + + auto abs = absolute(path); + if (!abs) { + return abs; + } + auto p = *abs; + auto existing = p; + auto tail = std::string {}; + while (!existing.empty()) { + char* r = ::realpath(existing.c_str(), nullptr); + if (r) { + auto result = std::string { r }; + ::free(r); // NOLINT(cppcoreguidelines-no-malloc) + if (!tail.empty()) { + result = pup::path::join(result, pup::path::normalize(tail)); + } + return result; + } + auto par = std::string { pup::path::parent(existing) }; + auto name = std::string { pup::path::filename(existing) }; + tail = tail.empty() ? name : pup::path::join(name, tail); + if (par == existing) { + break; + } + existing = par; + } + return pup::path::normalize(p); +} + +auto absolute(std::string const& path) -> Result +{ + if (!path.empty() && path[0] == '/') { + return path; + } + auto cwd = current_directory(); + if (!cwd) { + return cwd; + } + return pup::path::join(*cwd, path); +} + +auto read_symlink(std::string const& path) -> Result +{ + char buf[4096]; + auto n = ::readlink(path.c_str(), buf, sizeof(buf) - 1); + if (n < 0) { + return make_error(ErrorCode::IoError, "Failed to read symlink: " + path); + } + buf[n] = '\0'; + return std::string { buf }; +} + +// File I/O + +auto read_file(std::string const& path) -> Result +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + auto fd = ::open(path.c_str(), O_RDONLY); + if (fd < 0) { + return make_error(ErrorCode::IoError, "Failed to open file: " + path); + } + + struct stat st { }; + ::fstat(fd, &st); + auto size = static_cast(st.st_size); + + auto content = std::string(size, '\0'); + auto total = std::size_t { 0 }; + while (total < size) { + auto n = ::read(fd, content.data() + total, size - total); + if (n < 0) { + if (errno == EINTR) { + continue; + } + break; + } + if (n == 0) { + break; + } + total += static_cast(n); + } + ::close(fd); + + content.resize(total); + return content; +} + +auto write_file(std::string const& path, std::string_view data) -> Result +{ + auto par = std::string { pup::path::parent(path) }; + if (!par.empty()) { + auto r = create_directories(par); + if (!r) { + return r; + } + } + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + auto fd = ::open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) { + return make_error(ErrorCode::IoError, "Failed to open file for writing: " + path); + } + + auto n = ::write(fd, data.data(), data.size()); + ::close(fd); + + if (n != static_cast(data.size())) { + return make_error(ErrorCode::IoError, "Failed to write file: " + path); + } + return {}; +} + +// Directory traversal + +auto read_directory(std::string const& path) -> Result> +{ + auto* dir = ::opendir(path.c_str()); + if (!dir) { + return make_error>(ErrorCode::IoError, "Failed to open directory: " + path); + } + + auto entries = std::vector {}; + struct dirent* entry = nullptr; + while ((entry = ::readdir(dir)) != nullptr) { + if (std::strcmp(entry->d_name, ".") == 0 || std::strcmp(entry->d_name, "..") == 0) { + continue; + } + auto is_dir = false; +#ifdef _DIRENT_HAVE_D_TYPE + if (entry->d_type == DT_DIR) { + is_dir = true; + } else if (entry->d_type == DT_UNKNOWN) { +#endif + struct stat st { }; + auto full = path + "/" + entry->d_name; + if (::stat(full.c_str(), &st) == 0) { + is_dir = S_ISDIR(st.st_mode); + } +#ifdef _DIRENT_HAVE_D_TYPE + } +#endif + entries.push_back(DirEntry { entry->d_name, is_dir }); + } + ::closedir(dir); + return entries; +} + +namespace { + +auto walk_recursive( + std::string const& base, + std::string const& rel, + WalkVisitor const& visitor +) -> Result +{ + auto full = rel.empty() ? base : base + "/" + rel; + auto entries = read_directory(full); + if (!entries) { + return pup::unexpected(entries.error()); + } + + for (auto const& e : *entries) { + auto child_rel = rel.empty() ? e.name : rel + "/" + e.name; + auto should_recurse = visitor(e, child_rel); + if (e.is_dir && should_recurse) { + auto r = walk_recursive(base, child_rel, visitor); + if (!r) { + return r; + } + } + } + return {}; +} + +} // namespace + +auto walk_directory(std::string const& path, WalkVisitor const& visitor) -> Result +{ + return walk_recursive(path, "", visitor); +} + } // namespace pup::platform diff --git a/src/platform/file_io-win32.cpp b/src/platform/file_io-win32.cpp index 165ddb3..8ce8c87 100644 --- a/src/platform/file_io-win32.cpp +++ b/src/platform/file_io-win32.cpp @@ -1,14 +1,45 @@ // SPDX-License-Identifier: MIT // Copyright (c) 2024 Putup authors +#include "pup/core/path.hpp" #include "pup/platform/file_io.hpp" -#include - #include namespace pup::platform { +namespace { + +auto to_wide(std::string const& s) -> std::wstring +{ + if (s.empty()) { + return {}; + } + auto len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), static_cast(s.size()), nullptr, 0); + if (len == 0) { + len = MultiByteToWideChar(CP_UTF8, 0, s.data(), static_cast(s.size()), nullptr, 0); + } + auto result = std::wstring(static_cast(len), L'\0'); + MultiByteToWideChar(CP_UTF8, 0, s.data(), static_cast(s.size()), result.data(), len); + return result; +} + +auto from_wide(std::wstring const& w) -> std::string +{ + if (w.empty()) { + return {}; + } + auto len = WideCharToMultiByte(CP_UTF8, 0, w.data(), static_cast(w.size()), nullptr, 0, nullptr, nullptr); + if (len == 0) { + return {}; + } + auto result = std::string(static_cast(len), '\0'); + WideCharToMultiByte(CP_UTF8, 0, w.data(), static_cast(w.size()), result.data(), len, nullptr, nullptr); + return result; +} + +} // namespace + struct MappedFile::Impl { std::byte* data = nullptr; std::size_t size = 0; @@ -49,13 +80,14 @@ auto MappedFile::is_open() const -> bool return impl_ && impl_->data != nullptr; } -auto MappedFile::open(std::filesystem::path const& path) -> Result +auto MappedFile::open(std::string const& path) -> Result { auto file = MappedFile {}; file.impl_ = std::make_unique(); + auto wpath = to_wide(path); file.impl_->file_handle = CreateFileW( - path.c_str(), + wpath.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, @@ -127,10 +159,11 @@ auto MappedFile::close() -> void impl_.reset(); } -auto stat_file(std::filesystem::path const& path) -> Result +auto stat_file(std::string const& path) -> Result { + auto wpath = to_wide(path); auto attrs = WIN32_FILE_ATTRIBUTE_DATA {}; - if (!GetFileAttributesExW(path.c_str(), GetFileExInfoStandard, &attrs)) { + if (!GetFileAttributesExW(wpath.c_str(), GetFileExInfoStandard, &attrs)) { return make_error(ErrorCode::IoError, "Failed to stat file"); } @@ -138,9 +171,6 @@ auto stat_file(std::filesystem::path const& path) -> Result file_size.LowPart = attrs.nFileSizeLow; file_size.HighPart = attrs.nFileSizeHigh; - // Convert FILETIME to nanoseconds since Unix epoch - // FILETIME is 100-nanosecond intervals since Jan 1, 1601 - // Unix epoch is Jan 1, 1970 - difference is 116444736000000000 100-ns intervals auto mtime = ULARGE_INTEGER {}; mtime.LowPart = attrs.ftLastWriteTime.dwLowDateTime; mtime.HighPart = attrs.ftLastWriteTime.dwHighDateTime; @@ -154,34 +184,30 @@ auto stat_file(std::filesystem::path const& path) -> Result } auto atomic_write( - std::filesystem::path const& path, + std::string const& path, std::span data ) -> Result { - auto parent = path.parent_path(); - if (!parent.empty()) { - auto ec = std::error_code {}; - std::filesystem::create_directories(parent, ec); - if (ec) { - return make_error(ErrorCode::IoError, "Failed to create directory"); + auto par = std::string { pup::path::parent(path) }; + if (!par.empty()) { + auto r = create_directories(par); + if (!r) { + return r; } } - auto temp_path = path; - temp_path += L".tmp."; + auto temp_path = path + ".tmp."; - auto rd = std::random_device {}; - auto gen = std::mt19937 { std::random_device::result_type { rd() } }; - auto dist = std::uniform_int_distribution<> { 0, 15 }; - WCHAR temp_suffix[16]; - for (int i = 0; i < 8; ++i) { - temp_suffix[i] = L"0123456789abcdef"[dist(gen)]; + auto seed = static_cast(GetCurrentProcessId()) ^ static_cast(GetTickCount()); + auto const* const hex = "0123456789abcdef"; + for (auto i = 0; i < 8; ++i) { + seed = seed * 1103515245u + 12345u; + temp_path += hex[(seed >> 16) & 0xF]; } - temp_suffix[8] = L'\0'; - temp_path += temp_suffix; + auto wtemp = to_wide(temp_path); auto file = CreateFileW( - temp_path.c_str(), + wtemp.c_str(), GENERIC_WRITE, 0, nullptr, @@ -200,16 +226,399 @@ auto atomic_write( CloseHandle(file); if (!write_ok || bytes_written != data.size() || !flush_ok) { - DeleteFileW(temp_path.c_str()); + DeleteFileW(wtemp.c_str()); return make_error(ErrorCode::IoError, "Failed to write file"); } - if (!MoveFileExW(temp_path.c_str(), path.c_str(), MOVEFILE_REPLACE_EXISTING)) { - DeleteFileW(temp_path.c_str()); + auto wpath = to_wide(path); + if (!MoveFileExW(wtemp.c_str(), wpath.c_str(), MOVEFILE_REPLACE_EXISTING)) { + DeleteFileW(wtemp.c_str()); return make_error(ErrorCode::IoError, "Failed to rename file"); } return {}; } +// Filesystem queries + +auto exists(std::string const& path) -> bool +{ + auto wpath = to_wide(path); + return GetFileAttributesW(wpath.c_str()) != INVALID_FILE_ATTRIBUTES; +} + +auto is_file(std::string const& path) -> bool +{ + auto wpath = to_wide(path); + auto attrs = GetFileAttributesW(wpath.c_str()); + if (attrs == INVALID_FILE_ATTRIBUTES) { + return false; + } + return (attrs & FILE_ATTRIBUTE_DIRECTORY) == 0; +} + +auto is_directory(std::string const& path) -> bool +{ + auto wpath = to_wide(path); + auto attrs = GetFileAttributesW(wpath.c_str()); + if (attrs == INVALID_FILE_ATTRIBUTES) { + return false; + } + return (attrs & FILE_ATTRIBUTE_DIRECTORY) != 0; +} + +auto is_symlink(std::string const& path) -> bool +{ + auto wpath = to_wide(path); + auto attrs = GetFileAttributesW(wpath.c_str()); + if (attrs == INVALID_FILE_ATTRIBUTES) { + return false; + } + return (attrs & FILE_ATTRIBUTE_REPARSE_POINT) != 0; +} + +auto is_empty(std::string const& path) -> bool +{ + auto wpath = to_wide(path); + auto attrs = GetFileAttributesW(wpath.c_str()); + if (attrs == INVALID_FILE_ATTRIBUTES) { + return true; + } + if (attrs & FILE_ATTRIBUTE_DIRECTORY) { + auto search = wpath + L"\\*"; + auto fd = WIN32_FIND_DATAW {}; + auto h = FindFirstFileW(search.c_str(), &fd); + if (h == INVALID_HANDLE_VALUE) { + return true; + } + auto empty = true; + do { + if (wcscmp(fd.cFileName, L".") != 0 && wcscmp(fd.cFileName, L"..") != 0) { + empty = false; + break; + } + } while (FindNextFileW(h, &fd)); + FindClose(h); + return empty; + } + auto file_data = WIN32_FILE_ATTRIBUTE_DATA {}; + if (!GetFileAttributesExW(wpath.c_str(), GetFileExInfoStandard, &file_data)) { + return true; + } + return file_data.nFileSizeHigh == 0 && file_data.nFileSizeLow == 0; +} + +// Filesystem mutations + +auto create_directories(std::string const& path) -> Result +{ + if (path.empty()) { + return {}; + } + auto par = std::string { pup::path::parent(path) }; + if (!par.empty() && par != path) { + auto r = create_directories(par); + if (!r) { + return r; + } + } + auto wpath = to_wide(path); + if (!CreateDirectoryW(wpath.c_str(), nullptr)) { + auto err = GetLastError(); + if (err != ERROR_ALREADY_EXISTS) { + return make_error(ErrorCode::IoError, "Failed to create directory: " + path); + } + } + return {}; +} + +auto remove_file(std::string const& path) -> Result +{ + auto wpath = to_wide(path); + auto attrs = GetFileAttributesW(wpath.c_str()); + if (attrs == INVALID_FILE_ATTRIBUTES) { + return {}; + } + if (attrs & FILE_ATTRIBUTE_DIRECTORY) { + if (!RemoveDirectoryW(wpath.c_str())) { + return make_error(ErrorCode::IoError, "Failed to remove directory: " + path); + } + } else { + if (!DeleteFileW(wpath.c_str())) { + return make_error(ErrorCode::IoError, "Failed to remove file: " + path); + } + } + return {}; +} + +auto remove_all(std::string const& path) -> Result +{ + auto wpath = to_wide(path); + auto attrs = GetFileAttributesW(wpath.c_str()); + if (attrs == INVALID_FILE_ATTRIBUTES) { + return {}; + } + if (!(attrs & FILE_ATTRIBUTE_DIRECTORY)) { + if (!DeleteFileW(wpath.c_str())) { + return make_error(ErrorCode::IoError, "Failed to remove file: " + path); + } + return {}; + } + auto entries = read_directory(path); + if (entries) { + for (auto const& e : *entries) { + auto child = path + "/" + e.name; + auto r = remove_all(child); + if (!r) { + return r; + } + } + } + if (!RemoveDirectoryW(wpath.c_str())) { + return make_error(ErrorCode::IoError, "Failed to remove directory: " + path); + } + return {}; +} + +auto rename_path(std::string const& from, std::string const& to) -> Result +{ + auto wfrom = to_wide(from); + auto wto = to_wide(to); + if (!MoveFileExW(wfrom.c_str(), wto.c_str(), MOVEFILE_REPLACE_EXISTING)) { + return make_error(ErrorCode::IoError, "Failed to rename: " + from + " -> " + to); + } + return {}; +} + +auto copy_file(std::string const& from, std::string const& to) -> Result +{ + auto wfrom = to_wide(from); + auto wto = to_wide(to); + if (!CopyFileW(wfrom.c_str(), wto.c_str(), FALSE)) { + return make_error(ErrorCode::IoError, "Failed to copy: " + from + " -> " + to); + } + return {}; +} + +// Path resolution + +auto current_directory() -> Result +{ + auto len = GetCurrentDirectoryW(0, nullptr); + if (len == 0) { + return make_error(ErrorCode::IoError, "Failed to get current directory"); + } + auto buf = std::wstring(len, L'\0'); + GetCurrentDirectoryW(len, buf.data()); + buf.resize(len - 1); + auto result = from_wide(buf); + for (auto& c : result) { + if (c == '\\') { + c = '/'; + } + } + return result; +} + +auto canonical(std::string const& path) -> Result +{ + auto wpath = to_wide(path); + + // Try to open the path to resolve symlinks via GetFinalPathNameByHandleW + auto h = CreateFileW( + wpath.c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, nullptr + ); + + if (h != INVALID_HANDLE_VALUE) { + auto len = GetFinalPathNameByHandleW(h, nullptr, 0, FILE_NAME_NORMALIZED); + if (len > 0) { + auto buf = std::wstring(len, L'\0'); + GetFinalPathNameByHandleW(h, buf.data(), len + 1, FILE_NAME_NORMALIZED); + CloseHandle(h); + buf.resize(len - 1); + auto result = from_wide(buf); + // Strip \\?\ prefix + if (result.size() > 4 && result[0] == '\\' && result[1] == '\\' && result[2] == '?' && result[3] == '\\') { + result = result.substr(4); + } + for (auto& c : result) { + if (c == '\\') { + c = '/'; + } + } + return result; + } + CloseHandle(h); + } + + // Fallback for non-existent paths: lexical resolution only + auto len = GetFullPathNameW(wpath.c_str(), 0, nullptr, nullptr); + if (len == 0) { + return make_error(ErrorCode::IoError, "Failed to resolve path: " + path); + } + auto buf = std::wstring(len, L'\0'); + GetFullPathNameW(wpath.c_str(), len, buf.data(), nullptr); + buf.resize(len - 1); + auto result = from_wide(buf); + for (auto& c : result) { + if (c == '\\') { + c = '/'; + } + } + return result; +} + +auto absolute(std::string const& path) -> Result +{ + return canonical(path); +} + +auto read_symlink(std::string const& path) -> Result +{ + auto wpath = to_wide(path); + auto h = CreateFileW( + wpath.c_str(), + 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT, + nullptr + ); + if (h == INVALID_HANDLE_VALUE) { + return make_error(ErrorCode::IoError, "Failed to read symlink: " + path); + } + auto len = GetFinalPathNameByHandleW(h, nullptr, 0, FILE_NAME_NORMALIZED); + if (len == 0) { + CloseHandle(h); + return make_error(ErrorCode::IoError, "Failed to read symlink: " + path); + } + auto buf = std::wstring(len, L'\0'); + GetFinalPathNameByHandleW(h, buf.data(), len + 1, FILE_NAME_NORMALIZED); + CloseHandle(h); + buf.resize(len - 1); + auto result = from_wide(buf); + for (auto& c : result) { + if (c == '\\') { + c = '/'; + } + } + return result; +} + +// File I/O + +auto read_file(std::string const& path) -> Result +{ + auto wpath = to_wide(path); + auto h = CreateFileW( + wpath.c_str(), + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + nullptr + ); + if (h == INVALID_HANDLE_VALUE) { + return make_error(ErrorCode::IoError, "Failed to open file: " + path); + } + auto file_size = LARGE_INTEGER {}; + if (!GetFileSizeEx(h, &file_size)) { + CloseHandle(h); + return make_error(ErrorCode::IoError, "Failed to get file size: " + path); + } + auto size = static_cast(file_size.QuadPart); + auto content = std::string(size, '\0'); + auto total = std::size_t { 0 }; + while (total < size) { + auto chunk = static_cast(std::min(size - total, std::size_t { 0x7FFF'FFFFu })); + auto bytes_read = DWORD {}; + if (!ReadFile(h, content.data() + total, chunk, &bytes_read, nullptr) || bytes_read == 0) { + break; + } + total += bytes_read; + } + CloseHandle(h); + content.resize(total); + return content; +} + +auto write_file(std::string const& path, std::string_view data) -> Result +{ + auto par = std::string { pup::path::parent(path) }; + if (!par.empty()) { + auto r = create_directories(par); + if (!r) { + return r; + } + } + auto wpath = to_wide(path); + auto h = CreateFileW( + wpath.c_str(), + GENERIC_WRITE, + 0, + nullptr, + CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, + nullptr + ); + if (h == INVALID_HANDLE_VALUE) { + return make_error(ErrorCode::IoError, "Failed to open file for writing: " + path); + } + auto bytes_written = DWORD {}; + auto ok = WriteFile(h, data.data(), static_cast(data.size()), &bytes_written, nullptr); + CloseHandle(h); + if (!ok || bytes_written != data.size()) { + return make_error(ErrorCode::IoError, "Failed to write file: " + path); + } + return {}; +} + +// Directory traversal + +auto read_directory(std::string const& path) -> Result> +{ + auto wpath = to_wide(path) + L"\\*"; + auto fd = WIN32_FIND_DATAW {}; + auto h = FindFirstFileW(wpath.c_str(), &fd); + if (h == INVALID_HANDLE_VALUE) { + return make_error>(ErrorCode::IoError, "Failed to open directory: " + path); + } + auto entries = std::vector {}; + do { + if (wcscmp(fd.cFileName, L".") == 0 || wcscmp(fd.cFileName, L"..") == 0) { + continue; + } + auto name = from_wide(fd.cFileName); + auto is_dir = (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + entries.push_back(DirEntry { std::move(name), is_dir }); + } while (FindNextFileW(h, &fd)); + FindClose(h); + return entries; +} + +auto walk_directory(std::string const& path, WalkVisitor const& visitor) -> Result +{ + auto walk_impl = [&](auto& self, std::string const& base, std::string const& rel) -> Result { + auto full = rel.empty() ? base : base + "/" + rel; + auto entries = read_directory(full); + if (!entries) { + return pup::unexpected(entries.error()); + } + for (auto const& e : *entries) { + auto child_rel = rel.empty() ? e.name : rel + "/" + e.name; + auto should_recurse = visitor(e, child_rel); + if (e.is_dir && should_recurse) { + auto r = self(self, base, child_rel); + if (!r) { + return r; + } + } + } + return {}; + }; + return walk_impl(walk_impl, path, ""); +} + } // namespace pup::platform diff --git a/src/platform/path-posix.cpp b/src/platform/path-posix.cpp index ddc8227..b0c8dd0 100644 --- a/src/platform/path-posix.cpp +++ b/src/platform/path-posix.cpp @@ -5,9 +5,9 @@ namespace pup::platform { -auto to_utf8(std::filesystem::path const& path) -> std::string +auto to_utf8(std::string const& path) -> std::string { - return path.string(); + return path; } } // namespace pup::platform diff --git a/src/platform/path-win32.cpp b/src/platform/path-win32.cpp index 5843033..b0c8dd0 100644 --- a/src/platform/path-win32.cpp +++ b/src/platform/path-win32.cpp @@ -5,11 +5,9 @@ namespace pup::platform { -auto to_utf8(std::filesystem::path const& path) -> std::string +auto to_utf8(std::string const& path) -> std::string { - auto u8 = path.u8string(); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return std::string { reinterpret_cast(u8.data()), u8.size() }; + return path; } } // namespace pup::platform diff --git a/src/platform/process-win32.cpp b/src/platform/process-win32.cpp index 4491c0b..2b0c7c7 100644 --- a/src/platform/process-win32.cpp +++ b/src/platform/process-win32.cpp @@ -150,7 +150,11 @@ auto run_process_with_callback( // Convert working directory auto working_dir = std::wstring {}; if (!opts.working_dir.empty()) { - working_dir = opts.working_dir.wstring(); + auto len = MultiByteToWideChar(CP_UTF8, 0, opts.working_dir.c_str(), -1, nullptr, 0); + if (len > 0) { + working_dir.resize(len - 1); + MultiByteToWideChar(CP_UTF8, 0, opts.working_dir.c_str(), -1, working_dir.data(), len); + } } // Create process diff --git a/test/unit/Tupfile b/test/unit/Tupfile index 22d06de..647c04d 100644 --- a/test/unit/Tupfile +++ b/test/unit/Tupfile @@ -19,17 +19,24 @@ test-srcs-y += test_eval.cpp test-srcs-y += test_exec.cpp test-srcs-y += test_glob.cpp test-srcs-y += test_graph.cpp +test-srcs-y += test_arena.cpp test-srcs-y += test_hash.cpp +test-srcs-y += test_id_array.cpp +test-srcs-y += test_id_bitset.cpp test-srcs-y += test_ignore.cpp test-srcs-y += test_index.cpp test-srcs-y += test_layout.cpp test-srcs-y += test_lexer.cpp test-srcs-y += test_main.cpp +test-srcs-y += test_node_id_map.cpp test-srcs-y += test_parser.cpp +test-srcs-y += test_path.cpp test-srcs-y += test_path_utils.cpp test-srcs-y += test_platform_file_io.cpp test-srcs-y += test_platform_process.cpp test-srcs-y += test_rule_pattern.cpp +test-srcs-y += test_sorted_id_vec.cpp +test-srcs-y += test_string_pool.cpp test-srcs-y += test_string_utils.cpp test-srcs-y += test_target.cpp test-srcs-y += test_types.cpp diff --git a/test/unit/e2e_fixture.cpp b/test/unit/e2e_fixture.cpp index 08ba682..609faad 100644 --- a/test/unit/e2e_fixture.cpp +++ b/test/unit/e2e_fixture.cpp @@ -76,7 +76,7 @@ E2EFixture::E2EFixture(std::string_view name) } copy_fixture(m_fixture_dir, m_workdir); - m_runner.set_working_dir(m_workdir); + m_runner.set_working_dir(m_workdir.string()); } E2EFixture::~E2EFixture() @@ -296,7 +296,7 @@ auto E2EFixture::run_pup_in_dir( } auto opts = exec::RunOptions { - .working_dir = working_dir, + .working_dir = working_dir.string(), .inherit_env = true, }; @@ -332,7 +332,7 @@ auto run_shell_fixture(std::string_view name) -> ProcessResult auto runner = exec::CommandRunner {}; auto opts = exec::RunOptions { - .working_dir = workdir, + .working_dir = workdir.string(), .env = { "PUP=" + get_pup_binary().string() }, .inherit_env = true, }; diff --git a/test/unit/test_arena.cpp b/test/unit/test_arena.cpp new file mode 100644 index 0000000..79388c8 --- /dev/null +++ b/test/unit/test_arena.cpp @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/arena.hpp" + +#include + +TEST_CASE("Arena32 empty state", "[arena]") +{ + auto arena = pup::Arena32 {}; + + SECTION("size is zero") + { + REQUIRE(arena.size() == 0); + } + + SECTION("empty slice get returns nullptr") + { + auto const slice = pup::ArenaSlice { 0, 0 }; + REQUIRE(arena.get(slice) == nullptr); + } +} + +TEST_CASE("Arena32 append and get", "[arena]") +{ + auto arena = pup::Arena32 {}; + + SECTION("single append") + { + std::uint32_t const vals[] = { 10, 20, 30 }; + auto const slice = arena.append(vals, 3); + + REQUIRE(slice.offset == 0); + REQUIRE(slice.length == 3); + REQUIRE(arena.size() == 3); + + auto const* p = arena.get(slice); + REQUIRE(p != nullptr); + REQUIRE(p[0] == 10); + REQUIRE(p[1] == 20); + REQUIRE(p[2] == 30); + } + + SECTION("empty append") + { + auto const slice = arena.append(nullptr, 0); + REQUIRE(slice.length == 0); + REQUIRE(arena.size() == 0); + } +} + +TEST_CASE("Arena32 multiple appends are contiguous", "[arena]") +{ + auto arena = pup::Arena32 {}; + + std::uint32_t const a_vals[] = { 1, 2 }; + auto const a = arena.append(a_vals, 2); + + std::uint32_t const b_vals[] = { 3, 4, 5 }; + auto const b = arena.append(b_vals, 3); + + REQUIRE(a.offset == 0); + REQUIRE(a.length == 2); + REQUIRE(b.offset == 2); + REQUIRE(b.length == 3); + REQUIRE(arena.size() == 5); + + auto const* pa = arena.get(a); + auto const* pb = arena.get(b); + REQUIRE(pa[0] == 1); + REQUIRE(pa[1] == 2); + REQUIRE(pb[0] == 3); + REQUIRE(pb[1] == 4); + REQUIRE(pb[2] == 5); +} + +TEST_CASE("Arena32 compact shrinks allocation", "[arena]") +{ + auto arena = pup::Arena32 {}; + arena.reserve(1000); + + std::uint32_t const vals[] = { 42 }; + arena.append(vals, 1); + + REQUIRE(arena.size() == 1); + + arena.compact(); + REQUIRE(arena.size() == 1); + + auto const slice = pup::ArenaSlice { 0, 1 }; + REQUIRE(arena.get(slice)[0] == 42); +} + +TEST_CASE("Arena32 reserve pre-allocates", "[arena]") +{ + auto arena = pup::Arena32 {}; + arena.reserve(100); + + std::uint32_t const vals[] = { 7, 8, 9 }; + auto const slice = arena.append(vals, 3); + REQUIRE(arena.get(slice)[0] == 7); + REQUIRE(arena.size() == 3); +} + +TEST_CASE("Arena32 clear resets size", "[arena]") +{ + auto arena = pup::Arena32 {}; + std::uint32_t const vals[] = { 1, 2, 3 }; + arena.append(vals, 3); + + arena.clear(); + REQUIRE(arena.size() == 0); +} + +TEST_CASE("Arena32 move semantics", "[arena]") +{ + auto a = pup::Arena32 {}; + std::uint32_t const vals[] = { 10, 20 }; + auto const slice = a.append(vals, 2); + + SECTION("move constructor") + { + auto b = std::move(a); + REQUIRE(b.size() == 2); + REQUIRE(b.get(slice)[0] == 10); + REQUIRE(b.get(slice)[1] == 20); + } + + SECTION("move assignment") + { + auto b = pup::Arena32 {}; + std::uint32_t const other_vals[] = { 99 }; + b.append(other_vals, 1); + b = std::move(a); + REQUIRE(b.size() == 2); + REQUIRE(b.get(slice)[0] == 10); + } +} + +TEST_CASE("Arena32 slice() returns iterable span", "[arena]") +{ + auto arena = pup::Arena32 {}; + std::uint32_t vals[] = { 5, 10, 15 }; + auto s = arena.append(vals, 3); + + SECTION("span has correct data") + { + auto span = arena.slice(s); + REQUIRE(span.size() == 3); + REQUIRE(span[0] == 5); + REQUIRE(span[1] == 10); + REQUIRE(span[2] == 15); + } + + SECTION("span is iterable with range-for") + { + auto span = arena.slice(s); + auto sum = std::uint32_t { 0 }; + for (auto v : span) { + sum += v; + } + REQUIRE(sum == 30); + } + + SECTION("empty slice returns empty span") + { + auto empty = pup::ArenaSlice { 0, 0 }; + auto span = arena.slice(empty); + REQUIRE(span.empty()); + REQUIRE(span.size() == 0); + } +} + +TEST_CASE("Arena32 at() provides mutable access", "[arena]") +{ + auto arena = pup::Arena32 {}; + std::uint32_t vals[] = { 1, 2, 3 }; + auto s = arena.append(vals, 3); + + arena.at(s.offset + 1) = 42; + REQUIRE(arena.get(s)[1] == 42); +} + +TEST_CASE("Arena32 append_extend copies old slice and appends value", "[arena]") +{ + auto arena = pup::Arena32 {}; + + SECTION("extend from empty") + { + auto s = pup::ArenaSlice { 0, 0 }; + auto s2 = arena.append_extend(s, 42); + REQUIRE(s2.length == 1); + REQUIRE(arena.get(s2)[0] == 42); + } + + SECTION("extend existing slice") + { + std::uint32_t vals[] = { 1, 2, 3 }; + auto s = arena.append(vals, 3); + auto s2 = arena.append_extend(s, 4); + REQUIRE(s2.length == 4); + auto span = arena.slice(s2); + REQUIRE(span[0] == 1); + REQUIRE(span[1] == 2); + REQUIRE(span[2] == 3); + REQUIRE(span[3] == 4); + } + + SECTION("multiple extends") + { + auto s = pup::ArenaSlice { 0, 0 }; + for (std::uint32_t i = 0; i < 100; ++i) { + s = arena.append_extend(s, i); + } + REQUIRE(s.length == 100); + auto span = arena.slice(s); + for (std::uint32_t i = 0; i < 100; ++i) { + REQUIRE(span[i] == i); + } + } +} + +TEST_CASE("Arena32 append with nullptr zero-initializes", "[arena]") +{ + auto arena = pup::Arena32 {}; + auto s = arena.append(nullptr, 3); + + REQUIRE(s.length == 3); + REQUIRE(arena.get(s)[0] == 0); + REQUIRE(arena.get(s)[1] == 0); + REQUIRE(arena.get(s)[2] == 0); +} diff --git a/test/unit/test_builder.cpp b/test/unit/test_builder.cpp index 078044e..feaf26d 100644 --- a/test/unit/test_builder.cpp +++ b/test/unit/test_builder.cpp @@ -90,6 +90,7 @@ class BuilderTestFixture { } auto root() const -> fs::path const& { return test_root_; } + auto root_str() const -> std::string { return test_root_.string(); } auto create_file(fs::path const& rel_path) -> void { @@ -119,7 +120,7 @@ class BuilderTestFixture { // These test the exact behavior that the refactoring regression broke // ============================================================================= -TEST_CASE("GraphBuilder order-only group - case 1: empty pattern.path", "[builder][group][critical]") +TEST_CASE("GraphBuilder order-only group - case 1: empty pattern.path", "[e2e][builder][group][critical]") { // Case 1: is_order_only_group with empty pattern.path // Should use current_dir directly, NOT normalize_group_dir("") @@ -131,8 +132,8 @@ TEST_CASE("GraphBuilder order-only group - case 1: empty pattern.path", "[builde auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -177,7 +178,7 @@ TEST_CASE("GraphBuilder order-only group - case 1: empty pattern.path", "[builde REQUIRE(r2.has_value()); } -TEST_CASE("GraphBuilder order-only group - case 2: non-empty pattern.path with variable", "[builder][group][critical]") +TEST_CASE("GraphBuilder order-only group - case 2: non-empty pattern.path with variable", "[e2e][builder][group][critical]") { // Case 2: is_order_only_group with non-empty pattern.path // Should use normalize_group_dir(expanded_path, current_dir, source_root) @@ -189,8 +190,8 @@ TEST_CASE("GraphBuilder order-only group - case 2: non-empty pattern.path with v auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -236,7 +237,7 @@ TEST_CASE("GraphBuilder order-only group - case 2: non-empty pattern.path with v REQUIRE(r2.has_value()); } -TEST_CASE("GraphBuilder order-only group - case 3: path/ pattern", "[builder][group][critical]") +TEST_CASE("GraphBuilder order-only group - case 3: path/ pattern", "[e2e][builder][group][critical]") { // Case 3: After expand_path, path contains literal suffix // Should ALWAYS use normalize_group_dir(dir_part, current_dir, source_root) @@ -248,8 +249,8 @@ TEST_CASE("GraphBuilder order-only group - case 3: path/ pattern", "[buil auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -300,7 +301,7 @@ TEST_CASE("GraphBuilder order-only group - case 3: path/ pattern", "[buil // Bin {group} tests // ============================================================================= -TEST_CASE("GraphBuilder bin group reference {name}", "[builder][group]") +TEST_CASE("GraphBuilder bin group reference {name}", "[e2e][builder][group]") { auto fixture = BuilderTestFixture {}; auto graph = BuildGraph {}; @@ -308,8 +309,8 @@ TEST_CASE("GraphBuilder bin group reference {name}", "[builder][group]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -385,7 +386,7 @@ TEST_CASE("GraphBuilder bin group reference {name}", "[builder][group]") // Glob expansion tests // ============================================================================= -TEST_CASE("GraphBuilder glob expansion - filesystem", "[builder][glob]") +TEST_CASE("GraphBuilder glob expansion - filesystem", "[e2e][builder][glob]") { auto fixture = BuilderTestFixture {}; fixture.create_file("src/foo.c"); @@ -397,8 +398,8 @@ TEST_CASE("GraphBuilder glob expansion - filesystem", "[builder][glob]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = true, @@ -427,7 +428,7 @@ TEST_CASE("GraphBuilder glob expansion - filesystem", "[builder][glob]") REQUIRE(commands.size() == 2); } -TEST_CASE("GraphBuilder glob expansion - generated files", "[builder][glob]") +TEST_CASE("GraphBuilder glob expansion - generated files", "[e2e][builder][glob]") { auto fixture = BuilderTestFixture {}; @@ -436,9 +437,9 @@ TEST_CASE("GraphBuilder glob expansion - generated files", "[builder][glob]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), - .output_root = fixture.root() / "build-variant", + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), + .output_root = (fixture.root() / "build-variant").string(), .config_path = {}, .expand_globs = true, .validate_inputs = false, @@ -476,7 +477,7 @@ TEST_CASE("GraphBuilder glob expansion - generated files", "[builder][glob]") // tup.config special case // ============================================================================= -TEST_CASE("GraphBuilder tup.config in variant directory", "[builder][config]") +TEST_CASE("GraphBuilder tup.config in variant directory", "[e2e][builder][config]") { auto fixture = BuilderTestFixture {}; fixture.create_file("build-variant/tup.config"); @@ -487,9 +488,9 @@ TEST_CASE("GraphBuilder tup.config in variant directory", "[builder][config]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), - .output_root = fixture.root() / "build-variant", + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), + .output_root = (fixture.root() / "build-variant").string(), .config_path = {}, .expand_globs = false, .validate_inputs = false, @@ -517,7 +518,7 @@ TEST_CASE("GraphBuilder tup.config in variant directory", "[builder][config]") // Exclusion pattern tests // ============================================================================= -TEST_CASE("GraphBuilder exclusion patterns - explicit file", "[builder][exclusion]") +TEST_CASE("GraphBuilder exclusion patterns - explicit file", "[e2e][builder][exclusion]") { auto fixture = BuilderTestFixture {}; fixture.create_file("src/foo.c"); @@ -529,8 +530,8 @@ TEST_CASE("GraphBuilder exclusion patterns - explicit file", "[builder][exclusio auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = true, @@ -581,7 +582,7 @@ TEST_CASE("GraphBuilder exclusion patterns - explicit file", "[builder][exclusio CHECK_FALSE(has_baz); } -TEST_CASE("GraphBuilder exclusion patterns - glob pattern", "[builder][exclusion]") +TEST_CASE("GraphBuilder exclusion patterns - glob pattern", "[e2e][builder][exclusion]") { auto fixture = BuilderTestFixture {}; fixture.create_file("src/main.c"); @@ -594,8 +595,8 @@ TEST_CASE("GraphBuilder exclusion patterns - glob pattern", "[builder][exclusion auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = true, @@ -646,7 +647,7 @@ TEST_CASE("GraphBuilder exclusion patterns - glob pattern", "[builder][exclusion CHECK_FALSE(has_test); } -TEST_CASE("GraphBuilder caret exclusion patterns for foreach", "[builder][exclusion][foreach]") +TEST_CASE("GraphBuilder caret exclusion patterns for foreach", "[e2e][builder][exclusion][foreach]") { // Tests that ^ prefix works as exclusion in input patterns (tup uses ^ for foreach exclusions) auto fixture = BuilderTestFixture {}; @@ -659,8 +660,8 @@ TEST_CASE("GraphBuilder caret exclusion patterns for foreach", "[builder][exclus auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = true, @@ -719,7 +720,7 @@ TEST_CASE("GraphBuilder caret exclusion patterns for foreach", "[builder][exclus // Cross-directory group reference tests // ============================================================================= -TEST_CASE("GraphBuilder cross-directory order-only group with relative path", "[builder][group][cross-dir]") +TEST_CASE("GraphBuilder cross-directory order-only group with relative path", "[e2e][builder][group][cross-dir]") { auto fixture = BuilderTestFixture {}; @@ -728,8 +729,8 @@ TEST_CASE("GraphBuilder cross-directory order-only group with relative path", "[ auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -800,7 +801,7 @@ TEST_CASE("GraphBuilder cross-directory order-only group with relative path", "[ // normalize_group_dir edge cases // ============================================================================= -TEST_CASE("GraphBuilder normalize_group_dir empty string returns dot", "[builder][normalize]") +TEST_CASE("GraphBuilder normalize_group_dir empty string returns dot", "[e2e][builder][normalize]") { // This test documents the critical behavior: // normalize_group_dir("", current_dir, source_root) should return "." @@ -816,8 +817,8 @@ TEST_CASE("GraphBuilder normalize_group_dir empty string returns dot", "[builder auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -889,7 +890,7 @@ TEST_CASE("GraphBuilder normalize_group_dir empty string returns dot", "[builder // Variant build path resolution // ============================================================================= -TEST_CASE("GraphBuilder variant output mapping", "[builder][variant]") +TEST_CASE("GraphBuilder variant output mapping", "[e2e][builder][variant]") { auto fixture = BuilderTestFixture {}; fixture.create_file("src/main.c"); @@ -899,9 +900,9 @@ TEST_CASE("GraphBuilder variant output mapping", "[builder][variant]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), - .output_root = fixture.root() / "build-variant", + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), + .output_root = (fixture.root() / "build-variant").string(), .config_path = {}, .expand_globs = false, .validate_inputs = false, @@ -937,7 +938,7 @@ TEST_CASE("GraphBuilder variant output mapping", "[builder][variant]") // Deep current_dir tests (regression prevention) // ============================================================================= -TEST_CASE("GraphBuilder deep directory with parent references", "[builder][deep-dir]") +TEST_CASE("GraphBuilder deep directory with parent references", "[e2e][builder][deep-dir]") { auto fixture = BuilderTestFixture {}; fixture.create_file("include/common.h"); @@ -949,8 +950,8 @@ TEST_CASE("GraphBuilder deep directory with parent references", "[builder][deep- auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -991,7 +992,7 @@ TEST_CASE("GraphBuilder deep directory with parent references", "[builder][deep- REQUIRE(found_header); } -TEST_CASE("GraphBuilder directory node creation", "[builder][dir-nodes]") +TEST_CASE("GraphBuilder directory node creation", "[e2e][builder][dir-nodes]") { auto fixture = BuilderTestFixture {}; fixture.create_file("src/util/helpers.c"); @@ -1001,8 +1002,8 @@ TEST_CASE("GraphBuilder directory node creation", "[builder][dir-nodes]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -1057,7 +1058,7 @@ TEST_CASE("GraphBuilder directory node creation", "[builder][dir-nodes]") // Out-of-tree build tests (-B flag) // ============================================================================= -TEST_CASE("GraphBuilder out-of-tree build outputs use relative paths", "[builder][variant]") +TEST_CASE("GraphBuilder out-of-tree build outputs use relative paths", "[e2e][builder][variant]") { // When using -B for out-of-tree builds, output paths in the graph are stored // source-root-relative (e.g., "src/main.o"). Variant mapping is applied at I/O time. @@ -1069,12 +1070,12 @@ TEST_CASE("GraphBuilder out-of-tree build outputs use relative paths", "[builder auto ctx = EvalContext { .vars = &vars }; // Simulate -B build-variant - auto output_root = fixture.root() / "build-variant"; - fs::create_directories(output_root / "src"); + auto output_root = (fixture.root() / "build-variant").string(); + fs::create_directories(fs::path { output_root } / "src"); auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = output_root, .config_path = {}, .expand_globs = false, @@ -1121,7 +1122,7 @@ TEST_CASE("GraphBuilder out-of-tree build outputs use relative paths", "[builder CHECK(full_path[0] != '/'); } -TEST_CASE("GraphBuilder out-of-tree cross-directory generated file reference", "[builder][variant][critical]") +TEST_CASE("GraphBuilder out-of-tree cross-directory generated file reference", "[e2e][builder][variant][critical]") { // Test case: Tupfile in output/hex references generated file from boot/ // With source-root-relative storage, output/hex/Tupfile references @@ -1138,10 +1139,10 @@ TEST_CASE("GraphBuilder out-of-tree cross-directory generated file reference", " fs::create_directories(fixture.root() / "build-variant" / "boot"); fs::create_directories(fixture.root() / "build-variant" / "output" / "hex"); - auto output_root = fixture.root() / "build-variant"; + auto output_root = (fixture.root() / "build-variant").string(); auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = output_root, .config_path = {}, .expand_globs = false, @@ -1225,7 +1226,7 @@ TEST_CASE("GraphBuilder out-of-tree cross-directory generated file reference", " CHECK(found_boot_hex); } -TEST_CASE("GraphBuilder TUP_VARIANT_OUTPUTDIR matches tup behavior", "[builder][variant][critical]") +TEST_CASE("GraphBuilder TUP_VARIANT_OUTPUTDIR matches tup behavior", "[e2e][builder][variant][critical]") { // Based on tup test t8108-variant-outputdir.sh // Outputs are stored at source-relative paths (same as inputs). @@ -1242,9 +1243,9 @@ TEST_CASE("GraphBuilder TUP_VARIANT_OUTPUTDIR matches tup behavior", "[builder][ // In-tree variant build auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), - .output_root = fixture.root() / "build", + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), + .output_root = (fixture.root() / "build").string(), .config_path = {}, .expand_globs = false, .validate_inputs = false, @@ -1294,7 +1295,7 @@ TEST_CASE("GraphBuilder TUP_VARIANT_OUTPUTDIR matches tup behavior", "[builder][ // Path simplification tests // ============================================================================= -TEST_CASE("GraphBuilder path simplification at root", "[builder][paths]") +TEST_CASE("GraphBuilder path simplification at root", "[e2e][builder][paths]") { // Test that paths at project root stay as-is (no transformation needed) @@ -1307,8 +1308,8 @@ TEST_CASE("GraphBuilder path simplification at root", "[builder][paths]") auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -1344,7 +1345,7 @@ TEST_CASE("GraphBuilder path simplification at root", "[builder][paths]") CHECK(cmd_str.find("-o main.o") != std::string_view::npos); } -TEST_CASE("GraphBuilder path simplification in subdirectory commands", "[builder][paths]") +TEST_CASE("GraphBuilder path simplification in subdirectory commands", "[e2e][builder][paths]") { // Test that local files in subdirectory builds use simplified paths // e.g., "add.c" not "../../src/lib/add.c" when Tupfile is in src/lib/ @@ -1358,8 +1359,8 @@ TEST_CASE("GraphBuilder path simplification in subdirectory commands", "[builder auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -1396,7 +1397,7 @@ TEST_CASE("GraphBuilder path simplification in subdirectory commands", "[builder CHECK(cmd_str.find("-o add.o") != std::string_view::npos); } -TEST_CASE("GraphBuilder path simplification - cross-directory reference", "[builder][paths]") +TEST_CASE("GraphBuilder path simplification - cross-directory reference", "[e2e][builder][paths]") { // Test that cross-directory references resolve correctly // When Tupfile is in src/lib/ and references ../util/helper.c @@ -1413,8 +1414,8 @@ TEST_CASE("GraphBuilder path simplification - cross-directory reference", "[buil auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), .output_root = {}, .config_path = {}, .expand_globs = false, @@ -1453,7 +1454,7 @@ TEST_CASE("GraphBuilder path simplification - cross-directory reference", "[buil CHECK(cmd_str.find("src/util/helper.c") != std::string_view::npos); } -TEST_CASE("GraphBuilder path simplification in variant build", "[builder][paths][variant]") +TEST_CASE("GraphBuilder path simplification in variant build", "[e2e][builder][paths][variant]") { // Test that in variant builds: // - Input paths (source files) are still simplified @@ -1470,9 +1471,9 @@ TEST_CASE("GraphBuilder path simplification in variant build", "[builder][paths] // Variant build with output_root auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), - .output_root = fixture.root() / "build", + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), + .output_root = (fixture.root() / "build").string(), .config_path = {}, .expand_globs = false, .validate_inputs = false, @@ -1519,7 +1520,7 @@ TEST_CASE("GraphBuilder path simplification in variant build", "[builder][paths] // Dotdot filename edge cases (Issue #1: fragile ".." detection) // ============================================================================= -TEST_CASE("GraphBuilder output filename starting with dotdot is not parent reference", "[builder][paths][dotdot]") +TEST_CASE("GraphBuilder output filename starting with dotdot is not parent reference", "[e2e][builder][paths][dotdot]") { // Regression test: filenames like "..hidden" must be treated as literal filenames, // not as parent directory references. Node traversal handles this correctly because @@ -1536,9 +1537,9 @@ TEST_CASE("GraphBuilder output filename starting with dotdot is not parent refer auto ctx = EvalContext { .vars = &vars }; auto options = BuilderOptions { - .source_root = fixture.root(), - .config_root = fixture.root(), - .output_root = fixture.root() / "build", + .source_root = fixture.root_str(), + .config_root = fixture.root_str(), + .output_root = (fixture.root() / "build").string(), .config_path = {}, .expand_globs = false, .validate_inputs = false, diff --git a/test/unit/test_exec.cpp b/test/unit/test_exec.cpp index 0d158bb..c65c74e 100644 --- a/test/unit/test_exec.cpp +++ b/test/unit/test_exec.cpp @@ -443,8 +443,8 @@ TEST_CASE("Scheduler exported_vars", "[exec]") auto cmd_node = graph::CommandNode { .instruction_id = graph.intern("echo $PUP_TEST_EXPORT_VAR"), }; - cmd_node.exported_vars.insert(graph.intern("PUP_TEST_EXPORT_VAR")); - auto cmd_id = graph.add_command_node(cmd_node); + cmd_node.exported_vars.insert(to_underlying(graph.intern("PUP_TEST_EXPORT_VAR"))); + auto cmd_id = graph.add_command_node(std::move(cmd_node)); auto output_id = graph.add_file_node(graph::FileNode { .type = NodeType::Generated, @@ -486,7 +486,7 @@ TEST_CASE("Scheduler exported_vars", "[exec]") .instruction_id = graph.intern("echo ${PUP_TEST_HIDDEN_VAR:-default}"), }; // Note: exported_vars is empty - auto cmd_id = graph.add_command_node(cmd_node); + auto cmd_id = graph.add_command_node(std::move(cmd_node)); auto output_id = graph.add_file_node(graph::FileNode { .type = NodeType::Generated, diff --git a/test/unit/test_graph.cpp b/test/unit/test_graph.cpp index 165aecb..3f82cc3 100644 --- a/test/unit/test_graph.cpp +++ b/test/unit/test_graph.cpp @@ -965,11 +965,12 @@ TEST_CASE("collect_command_dependencies follows order-only deps through groups", (void)graph.add_order_only_edge(*group1, *c2); (void)graph.add_edge(*c2, *file2, LinkType::Normal); - auto commands = std::set { *c2 }; + auto commands = pup::NodeIdMap32 {}; + commands.set(*c2, 1); auto deps = pup::cli::collect_command_dependencies(graph, commands); - REQUIRE(deps.count(*c2) == 1); - REQUIRE(deps.count(*c1) == 1); + REQUIRE(deps.contains(*c2)); + REQUIRE(deps.contains(*c1)); } TEST_CASE("Topological sort respects order-only deps through groups", "[topo][groups][order-only]") diff --git a/test/unit/test_id_array.cpp b/test/unit/test_id_array.cpp new file mode 100644 index 0000000..6f88fb1 --- /dev/null +++ b/test/unit/test_id_array.cpp @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/id_array.hpp" + +#include + +TEST_CASE("IdArray32 empty state", "[id_array]") +{ + auto arr = pup::IdArray32 {}; + + SECTION("get returns 0 for any id") + { + REQUIRE(arr.get(0) == 0); + REQUIRE(arr.get(42) == 0); + } + + SECTION("contains returns false") + { + REQUIRE_FALSE(arr.contains(0)); + REQUIRE_FALSE(arr.contains(100)); + } + + SECTION("for_each visits nothing") + { + auto visited = std::size_t { 0 }; + arr.for_each( + [](std::uint32_t, std::uint32_t, void* ctx) { + ++*static_cast(ctx); + }, + &visited + ); + REQUIRE(visited == 0); + } +} + +TEST_CASE("IdArray32 set and get", "[id_array]") +{ + auto arr = pup::IdArray32 {}; + + SECTION("single value") + { + arr.set(10, 42); + REQUIRE(arr.get(10) == 42); + REQUIRE(arr.contains(10)); + } + + SECTION("overwrite existing") + { + arr.set(5, 100); + arr.set(5, 200); + REQUIRE(arr.get(5) == 200); + REQUIRE(arr.contains(5)); + } + + SECTION("unset returns 0") + { + arr.set(10, 42); + REQUIRE(arr.get(11) == 0); + REQUIRE_FALSE(arr.contains(11)); + } + + SECTION("set with value 0 is still present") + { + arr.set(7, 0); + REQUIRE(arr.contains(7)); + REQUIRE(arr.get(7) == 0); + } +} + +TEST_CASE("IdArray32 clear", "[id_array]") +{ + auto arr = pup::IdArray32 {}; + arr.set(1, 10); + arr.set(2, 20); + arr.set(3, 30); + + arr.clear(); + REQUIRE_FALSE(arr.contains(1)); + REQUIRE_FALSE(arr.contains(2)); + REQUIRE_FALSE(arr.contains(3)); + REQUIRE(arr.get(1) == 0); +} + +TEST_CASE("IdArray32 for_each iterates occupied slots", "[id_array]") +{ + auto arr = pup::IdArray32 {}; + arr.set(100, 1); + arr.set(5, 2); + arr.set(50, 3); + + auto collected = std::vector> {}; + arr.for_each( + [](std::uint32_t id, std::uint32_t value, void* ctx) { + static_cast>*>(ctx)->emplace_back(id, value); + }, + &collected + ); + + REQUIRE(collected.size() == 3); + REQUIRE(collected[0] == std::pair { 5, 2 }); + REQUIRE(collected[1] == std::pair { 50, 3 }); + REQUIRE(collected[2] == std::pair { 100, 1 }); +} + +TEST_CASE("IdArray32 move semantics", "[id_array]") +{ + auto a = pup::IdArray32 {}; + a.set(1, 10); + a.set(2, 20); + + SECTION("move constructor") + { + auto b = std::move(a); + REQUIRE(b.get(1) == 10); + REQUIRE(b.get(2) == 20); + REQUIRE(b.contains(1)); + } + + SECTION("move assignment") + { + auto b = pup::IdArray32 {}; + b.set(99, 99); + b = std::move(a); + REQUIRE(b.get(1) == 10); + REQUIRE(b.contains(1)); + REQUIRE_FALSE(b.contains(99)); + } +} + diff --git a/test/unit/test_id_bitset.cpp b/test/unit/test_id_bitset.cpp new file mode 100644 index 0000000..4c09578 --- /dev/null +++ b/test/unit/test_id_bitset.cpp @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/id_bitset.hpp" + +#include + +TEST_CASE("IdBitSet empty state", "[id_bitset]") +{ + auto bs = pup::IdBitSet {}; + + SECTION("count is zero") + { + REQUIRE(bs.count() == 0); + } + + SECTION("contains returns false for any id") + { + REQUIRE_FALSE(bs.contains(0)); + REQUIRE_FALSE(bs.contains(42)); + REQUIRE_FALSE(bs.contains(1000)); + } + + SECTION("remove on empty is safe") + { + bs.remove(0); + bs.remove(999); + REQUIRE(bs.count() == 0); + } + + SECTION("for_each visits nothing") + { + auto visited = std::size_t { 0 }; + bs.for_each( + [](std::uint32_t, void* ctx) { + ++*static_cast(ctx); + }, + &visited + ); + REQUIRE(visited == 0); + } +} + +TEST_CASE("IdBitSet insert and contains", "[id_bitset]") +{ + auto bs = pup::IdBitSet {}; + + SECTION("single insert") + { + bs.insert(10); + REQUIRE(bs.contains(10)); + REQUIRE_FALSE(bs.contains(11)); + REQUIRE(bs.count() == 1); + } + + SECTION("id 0") + { + bs.insert(0); + REQUIRE(bs.contains(0)); + REQUIRE(bs.count() == 1); + } + + SECTION("duplicate insert is idempotent") + { + bs.insert(5); + bs.insert(5); + bs.insert(5); + REQUIRE(bs.count() == 1); + REQUIRE(bs.contains(5)); + } + + SECTION("multiple ids") + { + bs.insert(1); + bs.insert(100); + bs.insert(200); + REQUIRE(bs.count() == 3); + REQUIRE(bs.contains(1)); + REQUIRE(bs.contains(100)); + REQUIRE(bs.contains(200)); + REQUIRE_FALSE(bs.contains(2)); + } +} + +TEST_CASE("IdBitSet word boundary", "[id_bitset]") +{ + auto bs = pup::IdBitSet {}; + + SECTION("id 63 and 64 span word boundary") + { + bs.insert(63); + bs.insert(64); + REQUIRE(bs.contains(63)); + REQUIRE(bs.contains(64)); + REQUIRE_FALSE(bs.contains(62)); + REQUIRE_FALSE(bs.contains(65)); + REQUIRE(bs.count() == 2); + } + + SECTION("all bits in first word") + { + for (auto i = std::uint32_t { 0 }; i < 64; ++i) { + bs.insert(i); + } + REQUIRE(bs.count() == 64); + for (auto i = std::uint32_t { 0 }; i < 64; ++i) { + REQUIRE(bs.contains(i)); + } + REQUIRE_FALSE(bs.contains(64)); + } +} + +TEST_CASE("IdBitSet remove", "[id_bitset]") +{ + auto bs = pup::IdBitSet {}; + bs.insert(10); + bs.insert(20); + bs.insert(30); + + SECTION("remove existing id") + { + bs.remove(20); + REQUIRE_FALSE(bs.contains(20)); + REQUIRE(bs.contains(10)); + REQUIRE(bs.contains(30)); + REQUIRE(bs.count() == 2); + } + + SECTION("remove non-existing id is safe") + { + bs.remove(99); + REQUIRE(bs.count() == 3); + } +} + +TEST_CASE("IdBitSet clear", "[id_bitset]") +{ + auto bs = pup::IdBitSet {}; + bs.insert(1); + bs.insert(2); + bs.insert(3); + + bs.clear(); + REQUIRE(bs.count() == 0); + REQUIRE_FALSE(bs.contains(1)); + REQUIRE_FALSE(bs.contains(2)); + REQUIRE_FALSE(bs.contains(3)); +} + +TEST_CASE("IdBitSet for_each iteration order", "[id_bitset]") +{ + auto bs = pup::IdBitSet {}; + bs.insert(200); + bs.insert(5); + bs.insert(63); + bs.insert(64); + bs.insert(0); + + auto collected = std::vector {}; + bs.for_each( + [](std::uint32_t id, void* ctx) { + static_cast*>(ctx)->push_back(id); + }, + &collected + ); + + REQUIRE(collected == std::vector { 0, 5, 63, 64, 200 }); +} + +TEST_CASE("IdBitSet move semantics", "[id_bitset]") +{ + auto a = pup::IdBitSet {}; + a.insert(42); + a.insert(100); + + SECTION("move constructor") + { + auto b = std::move(a); + REQUIRE(b.contains(42)); + REQUIRE(b.contains(100)); + REQUIRE(b.count() == 2); + } + + SECTION("move assignment") + { + auto b = pup::IdBitSet {}; + b.insert(7); + b = std::move(a); + REQUIRE(b.contains(42)); + REQUIRE(b.contains(100)); + REQUIRE_FALSE(b.contains(7)); + } +} diff --git a/test/unit/test_ignore.cpp b/test/unit/test_ignore.cpp index 1a65d24..e3bdcc9 100644 --- a/test/unit/test_ignore.cpp +++ b/test/unit/test_ignore.cpp @@ -17,19 +17,19 @@ TEST_CASE("IgnoreList default patterns", "[ignore]") SECTION("ignores .git directory") { REQUIRE(ignore.is_ignored(".git")); - REQUIRE(ignore.is_ignored(fs::path { ".git" })); + } SECTION("ignores .pup directory") { REQUIRE(ignore.is_ignored(".pup")); - REQUIRE(ignore.is_ignored(fs::path { ".pup" })); + } SECTION("ignores node_modules directory") { REQUIRE(ignore.is_ignored("node_modules")); - REQUIRE(ignore.is_ignored(fs::path { "node_modules" })); + } SECTION("does not ignore regular directories") @@ -140,7 +140,7 @@ TEST_CASE("IgnoreList path matching", "[ignore]") { ignore.add("vendor/"); REQUIRE(ignore.is_ignored("vendor")); - REQUIRE(ignore.is_ignored(fs::path { "vendor" })); + } SECTION("nested paths") @@ -178,7 +178,7 @@ TEST_CASE("IgnoreList file loading", "[ignore]") out << "!keep.tmp\n"; } - auto result = IgnoreList::load(ignore_file); + auto result = IgnoreList::load(ignore_file.string()); REQUIRE(result); auto& ignore = *result; @@ -191,7 +191,7 @@ TEST_CASE("IgnoreList file loading", "[ignore]") SECTION("handles missing file") { - auto result = IgnoreList::load(test_dir / "nonexistent"); + auto result = IgnoreList::load((test_dir / "nonexistent").string()); REQUIRE_FALSE(result); } diff --git a/test/unit/test_index.cpp b/test/unit/test_index.cpp index 76ef035..5fc6156 100644 --- a/test/unit/test_index.cpp +++ b/test/unit/test_index.cpp @@ -10,10 +10,17 @@ #include #include #include +#include using namespace pup; using namespace pup::index; +namespace pup::index { +[[nodiscard]] +auto build_command_lookup(Index const& index) + -> std::unordered_map; +} // namespace pup::index + namespace { auto find_file_by_path(Index const& index, std::string_view path) -> FileEntry const* @@ -282,7 +289,7 @@ TEST_CASE("Index in-memory operations", "[index]") } } -TEST_CASE("Index serialization roundtrip", "[index]") +TEST_CASE("Index serialization roundtrip", "[e2e][index]") { // IDs must be consecutive and match array position (id = array_index + 1) // Files: 1, 2, 3, 4, 5 in insertion order @@ -357,7 +364,7 @@ TEST_CASE("Index serialization roundtrip", "[index]") REQUIRE(data->size() > sizeof(RawHeader) + sizeof(RawFooter)); // Write to temp file and read back - auto temp_path = std::filesystem::temp_directory_path() / "pup_test_index"; + auto temp_path = (std::filesystem::temp_directory_path() / "pup_test_index").string(); auto write_result = write_index(temp_path, index); REQUIRE(write_result.has_value()); @@ -438,7 +445,7 @@ TEST_CASE("Index serialization roundtrip", "[index]") std::filesystem::remove(temp_path); } -TEST_CASE("Index ID contiguity requirement", "[index]") +TEST_CASE("Index ID contiguity requirement", "[e2e][index]") { // This test documents a design constraint: IDs must be contiguous when // stored in the index. The index format assigns IDs from array position @@ -492,7 +499,7 @@ TEST_CASE("Index ID contiguity requirement", "[index]") }); // Serialize and read back - auto temp_path = std::filesystem::temp_directory_path() / "pup_test_contiguous"; + auto temp_path = (std::filesystem::temp_directory_path() / "pup_test_contiguous").string(); auto write_result = write_index(temp_path, index); REQUIRE(write_result.has_value()); @@ -517,7 +524,7 @@ TEST_CASE("Index ID contiguity requirement", "[index]") std::filesystem::remove(temp_path); } -TEST_CASE("Index reader validation", "[index]") +TEST_CASE("Index reader validation", "[e2e][index]") { SECTION("non-existent file") { @@ -533,7 +540,7 @@ TEST_CASE("Index reader validation", "[index]") auto index = Index {}; index.add_file(FileEntry { .id = 1, .name = "test.c" }); - auto temp_path = std::filesystem::temp_directory_path() / "pup_valid_test"; + auto temp_path = (std::filesystem::temp_directory_path() / "pup_valid_test").string(); (void)write_index(temp_path, index); REQUIRE(is_valid_index(temp_path)); @@ -542,7 +549,7 @@ TEST_CASE("Index reader validation", "[index]") } } -TEST_CASE("Index reader malicious data handling", "[index]") +TEST_CASE("Index reader malicious data handling", "[e2e][index]") { // Create a minimal valid index to use as base auto const cmd_id = node_id::make_command(1); @@ -554,7 +561,7 @@ TEST_CASE("Index reader malicious data handling", "[index]") auto data = serialize_index(index); REQUIRE(data.has_value()); - auto temp_path = std::filesystem::temp_directory_path() / "pup_malicious_test"; + auto temp_path = (std::filesystem::temp_directory_path() / "pup_malicious_test").string(); SECTION("file_offset beyond file size") { @@ -1058,7 +1065,7 @@ TEST_CASE("v8 build_command_lookup", "[index][v8]") } } -TEST_CASE("v8 roundtrip with operand sections", "[index][v8]") +TEST_CASE("v8 roundtrip with operand sections", "[e2e][index][v8]") { auto index = Index {}; @@ -1103,7 +1110,7 @@ TEST_CASE("v8 roundtrip with operand sections", "[index][v8]") REQUIRE(data.has_value()); // Write and read back - auto temp_path = std::filesystem::temp_directory_path() / "pup_v8_roundtrip_test"; + auto temp_path = (std::filesystem::temp_directory_path() / "pup_v8_roundtrip_test").string(); auto write_result = write_index(temp_path, index); REQUIRE(write_result.has_value()); diff --git a/test/unit/test_layout.cpp b/test/unit/test_layout.cpp index ad4a19e..6158281 100644 --- a/test/unit/test_layout.cpp +++ b/test/unit/test_layout.cpp @@ -49,16 +49,16 @@ class TempDir { } // namespace -TEST_CASE("find_project_root", "[layout]") +TEST_CASE("find_project_root", "[e2e][layout]") { SECTION("finds Tupfile.ini in current directory") { auto tmp = TempDir {}; tmp.create_file("Tupfile.ini"); - auto result = pup::find_project_root(tmp.path()); + auto result = pup::find_project_root(tmp.path().string()); REQUIRE(result.has_value()); - REQUIRE(*result == tmp.path()); + REQUIRE(*result == tmp.path().string()); } SECTION("finds Tupfile.ini in parent directory") @@ -67,9 +67,9 @@ TEST_CASE("find_project_root", "[layout]") tmp.create_file("Tupfile.ini"); tmp.create_dir("src/lib"); - auto result = pup::find_project_root(tmp.path() / "src" / "lib"); + auto result = pup::find_project_root((tmp.path() / "src" / "lib").string()); REQUIRE(result.has_value()); - REQUIRE(*result == tmp.path()); + REQUIRE(*result == tmp.path().string()); } SECTION("build directory with .pup should find source root in parent") @@ -83,9 +83,9 @@ TEST_CASE("find_project_root", "[layout]") // When searching from build-release/, should find parent (with Tupfile.ini) // NOT stop at build-release/ just because it has .pup - auto result = pup::find_project_root(tmp.path() / "build-release"); + auto result = pup::find_project_root((tmp.path() / "build-release").string()); REQUIRE(result.has_value()); - REQUIRE(*result == tmp.path()); + REQUIRE(*result == tmp.path().string()); } SECTION("returns nullopt when no project root found") @@ -93,14 +93,14 @@ TEST_CASE("find_project_root", "[layout]") auto tmp = TempDir {}; tmp.create_dir("empty"); - auto result = pup::find_project_root(tmp.path() / "empty"); + auto result = pup::find_project_root((tmp.path() / "empty").string()); // Should walk up to tmp.path() but not find anything, then continue up // Eventually returns nullopt when reaching filesystem root // (This test may find a project root in parent dirs in dev environment) } } -TEST_CASE("discover_layout from build directory", "[layout]") +TEST_CASE("discover_layout from build directory", "[e2e][layout]") { SECTION("discovers source root when cwd is build directory") { @@ -119,7 +119,7 @@ TEST_CASE("discover_layout from build directory", "[layout]") fs::current_path(original_cwd); REQUIRE(result.has_value()); - REQUIRE(fs::canonical(result->source_root) == fs::canonical(tmp.path())); - REQUIRE(fs::canonical(result->output_root) == fs::canonical(tmp.path() / "build")); + REQUIRE(fs::canonical(result->source_root) == fs::canonical(tmp.path()).string()); + REQUIRE(fs::canonical(result->output_root) == fs::canonical(tmp.path() / "build").string()); } } diff --git a/test/unit/test_node_id_map.cpp b/test/unit/test_node_id_map.cpp new file mode 100644 index 0000000..319163b --- /dev/null +++ b/test/unit/test_node_id_map.cpp @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/arena.hpp" +#include "pup/core/node_id_map.hpp" + +using namespace pup; + +TEST_CASE("NodeIdMap32 dispatches by node type", "[node_id_map]") +{ + auto map = NodeIdMap32 {}; + map.resize_files(100); + map.resize_commands(50); + map.resize_conditions(10); + map.resize_phis(10); + + auto file_id = NodeId { 5 }; + auto cmd_id = node_id::make_command(3); + auto cond_id = node_id::make_condition(2); + auto phi_id = node_id::make_phi(1); + + SECTION("slots are independent per type") + { + map.set(file_id, 111); + map.set(cmd_id, 222); + map.set(cond_id, 333); + map.set(phi_id, 444); + REQUIRE(map.get(file_id) == 111); + REQUIRE(map.get(cmd_id) == 222); + REQUIRE(map.get(cond_id) == 333); + REQUIRE(map.get(phi_id) == 444); + } + + SECTION("contains checks correct sub-array") + { + map.set(file_id, 1); + REQUIRE(map.contains(file_id)); + REQUIRE_FALSE(map.contains(cmd_id)); + REQUIRE_FALSE(map.contains(cond_id)); + REQUIRE_FALSE(map.contains(phi_id)); + } + + SECTION("same index different types are independent") + { + auto f3 = NodeId { 3 }; + auto c3 = node_id::make_command(3); + map.set(f3, 10); + map.set(c3, 20); + REQUIRE(map.get(f3) == 10); + REQUIRE(map.get(c3) == 20); + } + + SECTION("clear resets all sub-arrays") + { + map.set(file_id, 1); + map.set(cmd_id, 2); + map.clear(); + REQUIRE_FALSE(map.contains(file_id)); + REQUIRE_FALSE(map.contains(cmd_id)); + } +} + +TEST_CASE("NodeIdArenaIndex stores and retrieves slices", "[node_id_map]") +{ + auto arena = Arena32 {}; + auto index = NodeIdArenaIndex {}; + index.resize_files(10); + index.resize_commands(10); + + auto file_id = NodeId { 3 }; + auto cmd_id = node_id::make_command(5); + + SECTION("absent node returns {0, 0}") + { + REQUIRE_FALSE(index.contains(file_id)); + auto s = index.get_slice(file_id); + REQUIRE(s.offset == 0); + REQUIRE(s.length == 0); + } + + SECTION("incremental append_extend builds slice") + { + auto s = index.get_slice(file_id); + s = arena.append_extend(s, 100); + index.set_slice(file_id, s); + s = arena.append_extend(s, 200); + index.set_slice(file_id, s); + s = arena.append_extend(s, 300); + index.set_slice(file_id, s); + + REQUIRE(index.contains(file_id)); + auto result = index.get_slice(file_id); + REQUIRE(result.length == 3); + + auto span = arena.slice(result); + REQUIRE(span[0] == 100); + REQUIRE(span[1] == 200); + REQUIRE(span[2] == 300); + } + + SECTION("different node types are independent") + { + auto sf = arena.append_extend(index.get_slice(file_id), 10); + index.set_slice(file_id, sf); + + auto sc = arena.append_extend(index.get_slice(cmd_id), 20); + index.set_slice(cmd_id, sc); + + REQUIRE(arena.slice(index.get_slice(file_id))[0] == 10); + REQUIRE(arena.slice(index.get_slice(cmd_id))[0] == 20); + } + + SECTION("clear resets all") + { + auto s = arena.append_extend(index.get_slice(file_id), 42); + index.set_slice(file_id, s); + index.clear(); + REQUIRE_FALSE(index.contains(file_id)); + } +} diff --git a/test/unit/test_path.cpp b/test/unit/test_path.cpp new file mode 100644 index 0000000..467f34a --- /dev/null +++ b/test/unit/test_path.cpp @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/path.hpp" + +using namespace pup::path; + +TEST_CASE("path::join", "[path]") +{ + SECTION("basic join") + { + REQUIRE(join("src", "foo.c") == "src/foo.c"); + } + + SECTION("trailing slash on lhs") + { + REQUIRE(join("src/", "foo.c") == "src/foo.c"); + } + + SECTION("empty lhs") + { + REQUIRE(join("", "foo.c") == "foo.c"); + } + + SECTION("empty rhs") + { + REQUIRE(join("src", "") == "src"); + } + + SECTION("absolute rhs replaces") + { + REQUIRE(join("src", "/usr/include") == "/usr/include"); + } + + SECTION("both empty") + { + REQUIRE(join("", "") == ""); + } +} + +TEST_CASE("path::parent", "[path]") +{ + SECTION("nested path") + { + REQUIRE(parent("src/lib/foo.c") == "src/lib"); + } + + SECTION("single component") + { + REQUIRE(parent("foo.c") == ""); + } + + SECTION("empty") + { + REQUIRE(parent("") == ""); + } + + SECTION("root") + { + REQUIRE(parent("/") == "/"); + } + + SECTION("root with file") + { + REQUIRE(parent("/foo") == "/"); + } + + SECTION("trailing slash") + { + REQUIRE(parent("src/lib/") == "src"); + } +} + +TEST_CASE("path::filename", "[path]") +{ + SECTION("nested") + { + REQUIRE(filename("src/foo.c") == "foo.c"); + } + + SECTION("just filename") + { + REQUIRE(filename("foo.c") == "foo.c"); + } + + SECTION("trailing slash") + { + REQUIRE(filename("src/") == ""); + } + + SECTION("empty") + { + REQUIRE(filename("") == ""); + } +} + +TEST_CASE("path::stem", "[path]") +{ + SECTION("basic") + { + REQUIRE(stem("foo.c") == "foo"); + } + + SECTION("double extension") + { + REQUIRE(stem("foo.tar.gz") == "foo.tar"); + } + + SECTION("no extension") + { + REQUIRE(stem("Makefile") == "Makefile"); + } + + SECTION("dotfile") + { + REQUIRE(stem(".gitignore") == ".gitignore"); + } + + SECTION("path prefix stripped") + { + REQUIRE(stem("src/foo.o") == "foo"); + } +} + +TEST_CASE("path::extension", "[path]") +{ + SECTION("basic") + { + REQUIRE(extension("foo.c") == ".c"); + } + + SECTION("double extension") + { + REQUIRE(extension("foo.tar.gz") == ".gz"); + } + + SECTION("no extension") + { + REQUIRE(extension("Makefile") == ""); + } + + SECTION("dotfile") + { + REQUIRE(extension(".gitignore") == ""); + } +} + +TEST_CASE("path::is_absolute", "[path]") +{ + REQUIRE(is_absolute("/usr/bin")); + REQUIRE(is_absolute("/")); + REQUIRE_FALSE(is_absolute("src/foo.c")); + REQUIRE_FALSE(is_absolute("")); + REQUIRE_FALSE(is_absolute("../foo")); +} + +TEST_CASE("path::normalize", "[path]") +{ + SECTION("dot segments") + { + REQUIRE(normalize("src/./foo.c") == "src/foo.c"); + } + + SECTION("dotdot segments") + { + REQUIRE(normalize("src/../include/foo.h") == "include/foo.h"); + } + + SECTION("complex") + { + REQUIRE(normalize("a/b/c/../../d") == "a/d"); + } + + SECTION("absolute path") + { + REQUIRE(normalize("/a/b/../c") == "/a/c"); + } + + SECTION("absolute excess dotdot absorbed") + { + REQUIRE(normalize("/a/../..") == "/"); + } + + SECTION("absolute single dotdot") + { + REQUIRE(normalize("/..") == "/"); + } + + SECTION("relative excess dotdot preserved") + { + REQUIRE(normalize("a/../../b") == "../b"); + } + + SECTION("empty normalizes to dot") + { + REQUIRE(normalize("") == "."); + } + + SECTION("just dot") + { + REQUIRE(normalize(".") == "."); + } + + SECTION("double slashes") + { + REQUIRE(normalize("src//foo.c") == "src/foo.c"); + } + + SECTION("trailing slash") + { + REQUIRE(normalize("src/lib/") == "src/lib"); + } +} + +TEST_CASE("path::relative", "[path]") +{ + SECTION("child of base") + { + REQUIRE(relative("a/b/c", "a") == "b/c"); + } + + SECTION("same path") + { + REQUIRE(relative("a/b", "a/b") == "."); + } + + SECTION("sibling") + { + REQUIRE(relative("x/y", "a/b") == "../../x/y"); + } + + SECTION("base is child") + { + REQUIRE(relative("a", "a/b/c") == "../.."); + } + + SECTION("both empty") + { + REQUIRE(relative("", "") == "."); + } +} diff --git a/test/unit/test_path_utils.cpp b/test/unit/test_path_utils.cpp index 23449e0..7e0ba6f 100644 --- a/test/unit/test_path_utils.cpp +++ b/test/unit/test_path_utils.cpp @@ -13,37 +13,37 @@ TEST_CASE("is_path_under checks path containment", "[path_utils]") { SECTION("path directly under root") { - REQUIRE(pup::is_path_under(fs::path { "/root/file.c" }, fs::path { "/root" })); - REQUIRE(pup::is_path_under(fs::path { "/root/dir/file.c" }, fs::path { "/root" })); + REQUIRE(pup::is_path_under(std::string {"/root/file.c" }, std::string {"/root" })); + REQUIRE(pup::is_path_under(std::string {"/root/dir/file.c" }, std::string {"/root" })); } SECTION("path equals root") { - REQUIRE(pup::is_path_under(fs::path { "/root" }, fs::path { "/root" })); + REQUIRE(pup::is_path_under(std::string {"/root" }, std::string {"/root" })); } SECTION("path not under root") { - REQUIRE_FALSE(pup::is_path_under(fs::path { "/other/file.c" }, fs::path { "/root" })); + REQUIRE_FALSE(pup::is_path_under(std::string {"/other/file.c" }, std::string {"/root" })); } SECTION("handles trailing slashes on root") { - REQUIRE(pup::is_path_under(fs::path { "/root/file.c" }, fs::path { "/root/" })); + REQUIRE(pup::is_path_under(std::string {"/root/file.c" }, std::string {"/root/" })); } SECTION("handles relative paths") { - REQUIRE(pup::is_path_under(fs::path { "src/lib/file.c" }, fs::path { "src" })); - REQUIRE(pup::is_path_under(fs::path { "src/lib/file.c" }, fs::path { "src/lib" })); - REQUIRE_FALSE(pup::is_path_under(fs::path { "src/lib/file.c" }, fs::path { "other" })); + REQUIRE(pup::is_path_under(std::string {"src/lib/file.c" }, std::string {"src" })); + REQUIRE(pup::is_path_under(std::string {"src/lib/file.c" }, std::string {"src/lib" })); + REQUIRE_FALSE(pup::is_path_under(std::string {"src/lib/file.c" }, std::string {"other" })); } SECTION("handles directory boundary correctly") { // "src-new" should not be under "src" - REQUIRE_FALSE(pup::is_path_under(fs::path { "/root/src-new/file.c" }, fs::path { "/root/src" })); - REQUIRE(pup::is_path_under(fs::path { "/root/src/file.c" }, fs::path { "/root/src" })); + REQUIRE_FALSE(pup::is_path_under(std::string {"/root/src-new/file.c" }, std::string {"/root/src" })); + REQUIRE(pup::is_path_under(std::string {"/root/src/file.c" }, std::string {"/root/src" })); } } @@ -51,23 +51,23 @@ TEST_CASE("relative_to_root computes relative paths", "[path_utils]") { SECTION("path under root") { - REQUIRE(pup::relative_to_root(fs::path { "/root/src/file.c" }, fs::path { "/root" }) == "src/file.c"); - REQUIRE(pup::relative_to_root(fs::path { "/root/file.c" }, fs::path { "/root" }) == "file.c"); + REQUIRE(pup::relative_to_root(std::string {"/root/src/file.c" }, std::string {"/root" }) == "src/file.c"); + REQUIRE(pup::relative_to_root(std::string {"/root/file.c" }, std::string {"/root" }) == "file.c"); } SECTION("path equals root returns empty") { - REQUIRE(pup::relative_to_root(fs::path { "/root" }, fs::path { "/root" }).empty()); + REQUIRE(pup::relative_to_root(std::string {"/root" }, std::string {"/root" }).empty()); } SECTION("path not under root returns empty") { - REQUIRE(pup::relative_to_root(fs::path { "/other/file.c" }, fs::path { "/root" }).empty()); + REQUIRE(pup::relative_to_root(std::string {"/other/file.c" }, std::string {"/root" }).empty()); } SECTION("handles trailing slashes") { - REQUIRE(pup::relative_to_root(fs::path { "/root/src/file.c" }, fs::path { "/root/" }) == "src/file.c"); + REQUIRE(pup::relative_to_root(std::string {"/root/src/file.c" }, std::string {"/root/" }) == "src/file.c"); } } @@ -178,8 +178,8 @@ TEST_CASE("strip_path_prefix removes prefix from path", "[path_utils][path]") TEST_CASE("resolve_under_root resolves paths to target root", "[path_utils][path]") { - auto source_root = fs::path { "/home/user/src/project" }; - auto target_root = fs::path { "/home/user/src/project/../build" }; + auto source_root = std::string {"/home/user/src/project" }; + auto target_root = std::string {"/home/user/src/project/../build" }; SECTION("resolves path under target root") { @@ -214,8 +214,8 @@ TEST_CASE("resolve_under_root resolves paths to target root", "[path_utils][path // - target_root = /home/user/src/pup/build/busybox // After joining "applets/../../pup/build/busybox/include/autoconf.h" with // current_dir and lexically_normal, we get "../pup/build/busybox/include/autoconf.h" - auto src = fs::path { "/home/user/src/busybox" }; - auto tgt = fs::path { "/home/user/src/pup/build/busybox" }; + auto src = std::string {"/home/user/src/busybox" }; + auto tgt = std::string {"/home/user/src/pup/build/busybox" }; auto result = pup::resolve_under_root("../pup/build/busybox/include/autoconf.h", src, tgt); REQUIRE(result.has_value()); REQUIRE(*result == "include/autoconf.h"); diff --git a/test/unit/test_platform_file_io.cpp b/test/unit/test_platform_file_io.cpp index cf5a074..bbfe6d6 100644 --- a/test/unit/test_platform_file_io.cpp +++ b/test/unit/test_platform_file_io.cpp @@ -11,7 +11,7 @@ using namespace pup::platform; using namespace pup::test; -SCENARIO("MappedFile provides read-only access to file contents", "[platform][file_io]") +SCENARIO("MappedFile provides read-only access to file contents", "[e2e][platform][file_io]") { GIVEN("a file with known contents") { @@ -21,7 +21,7 @@ SCENARIO("MappedFile provides read-only access to file contents", "[platform][fi WHEN("the file is memory-mapped") { - auto result = MappedFile::open(f.workdir() / "test.bin"); + auto result = MappedFile::open((f.workdir() / "test.bin").string()); THEN("the operation succeeds") { @@ -50,7 +50,7 @@ SCENARIO("MappedFile handles missing files", "[platform][file_io]") { GIVEN("a path to a non-existent file") { - auto path = std::filesystem::path { "/tmp/pup_test_nonexistent_12345.bin" }; + auto path = std::string { "/tmp/pup_test_nonexistent_12345.bin" }; WHEN("attempting to memory-map the file") { @@ -65,14 +65,14 @@ SCENARIO("MappedFile handles missing files", "[platform][file_io]") } } -SCENARIO("MappedFile is move-only", "[platform][file_io]") +SCENARIO("MappedFile is move-only", "[e2e][platform][file_io]") { GIVEN("a memory-mapped file") { auto f = E2EFixture { "simple_c" }; f.write_file("test.bin", "test data"); - auto original = MappedFile::open(f.workdir() / "test.bin"); + auto original = MappedFile::open((f.workdir() / "test.bin").string()); REQUIRE(original.has_value()); auto original_data = original->data(); auto original_size = original->size(); @@ -96,7 +96,7 @@ SCENARIO("MappedFile is move-only", "[platform][file_io]") } } -TEST_CASE("stat_file returns file metadata", "[platform][file_io]") +TEST_CASE("stat_file returns file metadata", "[e2e][platform][file_io]") { auto f = E2EFixture { "simple_c" }; auto content = std::string { "test content with known size" }; @@ -104,20 +104,20 @@ TEST_CASE("stat_file returns file metadata", "[platform][file_io]") SECTION("returns correct size for existing file") { - auto result = stat_file(f.workdir() / "stat_test.txt"); + auto result = stat_file((f.workdir() / "stat_test.txt").string()); REQUIRE(result.has_value()); REQUIRE(result->size == content.size()); } SECTION("returns error for non-existent file") { - auto result = stat_file(f.workdir() / "nonexistent.txt"); + auto result = stat_file((f.workdir() / "nonexistent.txt").string()); REQUIRE_FALSE(result.has_value()); REQUIRE(result.error().code == pup::ErrorCode::IoError); } } -TEST_CASE("atomic_write creates file atomically", "[platform][file_io]") +TEST_CASE("atomic_write creates file atomically", "[e2e][platform][file_io]") { auto f = E2EFixture { "simple_c" }; auto content = std::string { "atomic write test content" }; @@ -128,7 +128,7 @@ TEST_CASE("atomic_write creates file atomically", "[platform][file_io]") SECTION("creates new file with correct contents") { - auto path = f.workdir() / "atomic_test.txt"; + auto path = (f.workdir() / "atomic_test.txt").string(); auto result = atomic_write(path, content_bytes); REQUIRE(result.has_value()); @@ -140,7 +140,7 @@ TEST_CASE("atomic_write creates file atomically", "[platform][file_io]") SECTION("overwrites existing file") { f.write_file("existing.txt", "old content"); - auto path = f.workdir() / "existing.txt"; + auto path = (f.workdir() / "existing.txt").string(); auto result = atomic_write(path, content_bytes); REQUIRE(result.has_value()); @@ -151,7 +151,7 @@ TEST_CASE("atomic_write creates file atomically", "[platform][file_io]") SECTION("creates parent directories if needed") { - auto path = f.workdir() / "subdir" / "nested" / "atomic_test.txt"; + auto path = (f.workdir() / "subdir" / "nested" / "atomic_test.txt").string(); auto result = atomic_write(path, content_bytes); REQUIRE(result.has_value()); diff --git a/test/unit/test_platform_process.cpp b/test/unit/test_platform_process.cpp index 5b34f10..fc4d0aa 100644 --- a/test/unit/test_platform_process.cpp +++ b/test/unit/test_platform_process.cpp @@ -91,7 +91,7 @@ SCENARIO("run_process respects working directory", "[platform][process]") f.mkdir("subdir"); auto opts = make_opts(PWD_CMD); - opts.working_dir = f.workdir() / "subdir"; + opts.working_dir = (f.workdir() / "subdir").string(); opts.capture_stdout = true; WHEN("the process is executed") diff --git a/test/unit/test_sorted_id_vec.cpp b/test/unit/test_sorted_id_vec.cpp new file mode 100644 index 0000000..015ea7d --- /dev/null +++ b/test/unit/test_sorted_id_vec.cpp @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/sorted_id_vec.hpp" + +#include + +// --- SortedIdVec --- + +TEST_CASE("SortedIdVec empty state", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + REQUIRE(vec.size() == 0); + REQUIRE(vec.empty()); + REQUIRE_FALSE(vec.contains(0)); + REQUIRE_FALSE(vec.contains(42)); + REQUIRE(vec.data() == nullptr); +} + +TEST_CASE("SortedIdVec empty returns false after insert", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + vec.insert(1); + REQUIRE_FALSE(vec.empty()); +} + +TEST_CASE("SortedIdVec merge_from combines disjoint sets", "[sorted_id_vec]") +{ + auto a = pup::SortedIdVec {}; + a.insert(10); + a.insert(30); + + auto b = pup::SortedIdVec {}; + b.insert(20); + b.insert(40); + + a.merge_from(b); + REQUIRE(a.size() == 4); + REQUIRE(a.data()[0] == 10); + REQUIRE(a.data()[1] == 20); + REQUIRE(a.data()[2] == 30); + REQUIRE(a.data()[3] == 40); +} + +TEST_CASE("SortedIdVec merge_from deduplicates overlapping sets", "[sorted_id_vec]") +{ + auto a = pup::SortedIdVec {}; + a.insert(1); + a.insert(2); + a.insert(3); + + auto b = pup::SortedIdVec {}; + b.insert(2); + b.insert(4); + + a.merge_from(b); + REQUIRE(a.size() == 4); + REQUIRE(a.contains(1)); + REQUIRE(a.contains(2)); + REQUIRE(a.contains(3)); + REQUIRE(a.contains(4)); +} + +TEST_CASE("SortedIdVec merge_from from empty is no-op", "[sorted_id_vec]") +{ + auto a = pup::SortedIdVec {}; + a.insert(5); + + auto b = pup::SortedIdVec {}; + a.merge_from(b); + REQUIRE(a.size() == 1); + REQUIRE(a.contains(5)); +} + +TEST_CASE("SortedIdVec merge_from into empty", "[sorted_id_vec]") +{ + auto a = pup::SortedIdVec {}; + + auto b = pup::SortedIdVec {}; + b.insert(7); + b.insert(3); + + a.merge_from(b); + REQUIRE(a.size() == 2); + REQUIRE(a.data()[0] == 3); + REQUIRE(a.data()[1] == 7); +} + +TEST_CASE("SortedIdVec insert maintains sorted order", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + REQUIRE(vec.insert(30)); + REQUIRE(vec.insert(10)); + REQUIRE(vec.insert(20)); + + REQUIRE(vec.size() == 3); + REQUIRE(vec.data()[0] == 10); + REQUIRE(vec.data()[1] == 20); + REQUIRE(vec.data()[2] == 30); +} + +TEST_CASE("SortedIdVec duplicate insert is idempotent", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + REQUIRE(vec.insert(5)); + REQUIRE_FALSE(vec.insert(5)); + REQUIRE(vec.size() == 1); +} + +TEST_CASE("SortedIdVec contains", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + vec.insert(1); + vec.insert(100); + vec.insert(50); + + REQUIRE(vec.contains(1)); + REQUIRE(vec.contains(50)); + REQUIRE(vec.contains(100)); + REQUIRE_FALSE(vec.contains(0)); + REQUIRE_FALSE(vec.contains(51)); + REQUIRE_FALSE(vec.contains(200)); +} + +TEST_CASE("SortedIdVec remove", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + vec.insert(10); + vec.insert(20); + vec.insert(30); + + SECTION("remove existing returns true") + { + REQUIRE(vec.remove(20)); + REQUIRE(vec.size() == 2); + REQUIRE_FALSE(vec.contains(20)); + REQUIRE(vec.contains(10)); + REQUIRE(vec.contains(30)); + } + + SECTION("remove non-existing returns false") + { + REQUIRE_FALSE(vec.remove(99)); + REQUIRE(vec.size() == 3); + } + + SECTION("remove first element") + { + REQUIRE(vec.remove(10)); + REQUIRE(vec.size() == 2); + REQUIRE(vec.data()[0] == 20); + } + + SECTION("remove last element") + { + REQUIRE(vec.remove(30)); + REQUIRE(vec.size() == 2); + REQUIRE(vec.data()[1] == 20); + } +} + +TEST_CASE("SortedIdVec clear", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + vec.insert(1); + vec.insert(2); + vec.insert(3); + + vec.clear(); + REQUIRE(vec.size() == 0); + REQUIRE_FALSE(vec.contains(1)); +} + +TEST_CASE("SortedIdVec for_each", "[sorted_id_vec]") +{ + auto vec = pup::SortedIdVec {}; + vec.insert(50); + vec.insert(10); + vec.insert(30); + + auto collected = std::vector {}; + vec.for_each( + [](std::uint32_t id, void* ctx) { + static_cast*>(ctx)->push_back(id); + }, + &collected + ); + + REQUIRE(collected == std::vector { 10, 30, 50 }); +} + +TEST_CASE("SortedIdVec move semantics", "[sorted_id_vec]") +{ + auto a = pup::SortedIdVec {}; + a.insert(1); + a.insert(2); + + SECTION("move constructor") + { + auto b = std::move(a); + REQUIRE(b.size() == 2); + REQUIRE(b.contains(1)); + REQUIRE(b.contains(2)); + } + + SECTION("move assignment") + { + auto b = pup::SortedIdVec {}; + b.insert(99); + b = std::move(a); + REQUIRE(b.size() == 2); + REQUIRE(b.contains(1)); + REQUIRE_FALSE(b.contains(99)); + } +} + +// --- SortedPairVec --- + +TEST_CASE("SortedPairVec empty state", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + REQUIRE(vec.size() == 0); + REQUIRE_FALSE(vec.contains(0)); + REQUIRE(vec.find(42) == nullptr); +} + +TEST_CASE("SortedPairVec insert and find", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + REQUIRE(vec.insert(30, 300)); + REQUIRE(vec.insert(10, 100)); + REQUIRE(vec.insert(20, 200)); + + REQUIRE(vec.size() == 3); + + auto const* v10 = vec.find(10); + auto const* v20 = vec.find(20); + auto const* v30 = vec.find(30); + + REQUIRE(v10 != nullptr); + REQUIRE(*v10 == 100); + REQUIRE(v20 != nullptr); + REQUIRE(*v20 == 200); + REQUIRE(v30 != nullptr); + REQUIRE(*v30 == 300); +} + +TEST_CASE("SortedPairVec overwrite existing key", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + REQUIRE(vec.insert(5, 50)); + REQUIRE_FALSE(vec.insert(5, 99)); + + REQUIRE(vec.size() == 1); + REQUIRE(*vec.find(5) == 99); +} + +TEST_CASE("SortedPairVec contains", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + vec.insert(10, 1); + vec.insert(20, 2); + + REQUIRE(vec.contains(10)); + REQUIRE(vec.contains(20)); + REQUIRE_FALSE(vec.contains(15)); +} + +TEST_CASE("SortedPairVec remove", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + vec.insert(10, 1); + vec.insert(20, 2); + vec.insert(30, 3); + + SECTION("remove existing returns true") + { + REQUIRE(vec.remove(20)); + REQUIRE(vec.size() == 2); + REQUIRE_FALSE(vec.contains(20)); + REQUIRE(vec.contains(10)); + REQUIRE(vec.contains(30)); + } + + SECTION("remove non-existing returns false") + { + REQUIRE_FALSE(vec.remove(99)); + REQUIRE(vec.size() == 3); + } +} + +TEST_CASE("SortedPairVec for_each iterates in key order", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + vec.insert(50, 5); + vec.insert(10, 1); + vec.insert(30, 3); + + auto collected = std::vector> {}; + vec.for_each( + [](std::uint32_t key, std::uint32_t value, void* ctx) { + static_cast>*>(ctx)->emplace_back(key, value); + }, + &collected + ); + + REQUIRE(collected.size() == 3); + REQUIRE(collected[0] == std::pair { 10, 1 }); + REQUIRE(collected[1] == std::pair { 30, 3 }); + REQUIRE(collected[2] == std::pair { 50, 5 }); +} + +TEST_CASE("SortedPairVec clear", "[sorted_pair_vec]") +{ + auto vec = pup::SortedPairVec {}; + vec.insert(1, 10); + vec.insert(2, 20); + + vec.clear(); + REQUIRE(vec.size() == 0); + REQUIRE_FALSE(vec.contains(1)); + REQUIRE(vec.find(1) == nullptr); +} + +TEST_CASE("SortedPairVec move semantics", "[sorted_pair_vec]") +{ + auto a = pup::SortedPairVec {}; + a.insert(1, 10); + a.insert(2, 20); + + SECTION("move constructor") + { + auto b = std::move(a); + REQUIRE(b.size() == 2); + REQUIRE(*b.find(1) == 10); + REQUIRE(*b.find(2) == 20); + } + + SECTION("move assignment") + { + auto b = pup::SortedPairVec {}; + b.insert(99, 99); + b = std::move(a); + REQUIRE(b.size() == 2); + REQUIRE(b.contains(1)); + REQUIRE_FALSE(b.contains(99)); + } +} diff --git a/test/unit/test_string_pool.cpp b/test/unit/test_string_pool.cpp new file mode 100644 index 0000000..67ab3f3 --- /dev/null +++ b/test/unit/test_string_pool.cpp @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024 Putup authors + +#include "catch_amalgamated.hpp" +#include "pup/core/string_pool.hpp" + +#include + +TEST_CASE("StringPool Robin Hood index", "[string_pool]") +{ + auto pool = pup::StringPool {}; + + SECTION("basic intern and get") + { + auto id = pool.intern("hello"); + REQUIRE(pool.get(id) == "hello"); + } + + SECTION("deduplication") + { + auto id1 = pool.intern("same"); + auto id2 = pool.intern("same"); + REQUIRE(id1 == id2); + REQUIRE(pool.size() == 1); + } + + SECTION("find existing") + { + (void)pool.intern("exists"); + REQUIRE(pool.find("exists") != pup::StringId::Empty); + REQUIRE(pool.find("missing") == pup::StringId::Empty); + } + + SECTION("empty string") + { + auto id = pool.intern(""); + REQUIRE(id == pup::StringId::Empty); + REQUIRE(pool.get(pup::StringId::Empty) == ""); + } + + SECTION("stress: 10K unique strings") + { + for (auto i = 0; i < 10000; ++i) { + char buf[32]; + std::snprintf(buf, sizeof(buf), "var_%d", i); + auto id = pool.intern(buf); + REQUIRE(pool.get(id) == buf); + } + REQUIRE(pool.size() == 10000); + + for (auto i = 0; i < 10000; ++i) { + char buf[32]; + std::snprintf(buf, sizeof(buf), "var_%d", i); + REQUIRE(pool.find(buf) != pup::StringId::Empty); + } + } + + SECTION("stress: 10K duplicate interns") + { + (void)pool.intern("repeated"); + for (auto i = 0; i < 10000; ++i) { + (void)pool.intern("repeated"); + } + REQUIRE(pool.size() == 1); + } + + SECTION("reserve pre-allocates") + { + pool.reserve(1000); + for (auto i = 0; i < 1000; ++i) { + char buf[32]; + std::snprintf(buf, sizeof(buf), "r_%d", i); + (void)pool.intern(buf); + } + REQUIRE(pool.size() == 1000); + } + + SECTION("clear resets everything") + { + (void)pool.intern("a"); + (void)pool.intern("b"); + pool.clear(); + REQUIRE(pool.size() == 0); + REQUIRE(pool.find("a") == pup::StringId::Empty); + } + + SECTION("move semantics") + { + (void)pool.intern("before_move"); + auto id = pool.find("before_move"); + + auto moved = std::move(pool); + REQUIRE(moved.get(id) == "before_move"); + REQUIRE(moved.find("before_move") == id); + } +} diff --git a/test/unit/test_target.cpp b/test/unit/test_target.cpp index 461d8fd..b06a540 100644 --- a/test/unit/test_target.cpp +++ b/test/unit/test_target.cpp @@ -48,7 +48,7 @@ class TempDir { } // namespace -SCENARIO("Target parsing - variant detection", "[target]") +SCENARIO("Target parsing - variant detection", "[e2e][target]") { GIVEN("a project with build-debug/tup.config") { @@ -59,7 +59,7 @@ SCENARIO("Target parsing - variant detection", "[target]") WHEN("parsing 'build-debug'") { - auto result = pup::parse_target(tmp.path(), "build-debug"); + auto result = pup::parse_target(tmp.path().string(), "build-debug"); THEN("variant=build-debug, scope=empty, is_output=false") { @@ -74,7 +74,7 @@ SCENARIO("Target parsing - variant detection", "[target]") WHEN("parsing 'build-debug/src/lib'") { tmp.create_dir("build-debug/src/lib"); - auto result = pup::parse_target(tmp.path(), "build-debug/src/lib"); + auto result = pup::parse_target(tmp.path().string(), "build-debug/src/lib"); THEN("variant=build-debug, scope=src/lib, is_output=false") { @@ -89,7 +89,7 @@ SCENARIO("Target parsing - variant detection", "[target]") WHEN("parsing 'build-debug/src/lib/foo.o'") { tmp.create_file("build-debug/src/lib/foo.o"); - auto result = pup::parse_target(tmp.path(), "build-debug/src/lib/foo.o"); + auto result = pup::parse_target(tmp.path().string(), "build-debug/src/lib/foo.o"); THEN("variant=build-debug, scope=src/lib/foo.o, is_output=true") { @@ -103,7 +103,7 @@ SCENARIO("Target parsing - variant detection", "[target]") WHEN("parsing 'src/lib'") { - auto result = pup::parse_target(tmp.path(), "src/lib"); + auto result = pup::parse_target(tmp.path().string(), "src/lib"); THEN("variant=nullopt, scope=src/lib, is_output=false") { @@ -117,7 +117,7 @@ SCENARIO("Target parsing - variant detection", "[target]") WHEN("parsing 'src/lib/foo.o'") { tmp.create_file("src/lib/foo.o"); - auto result = pup::parse_target(tmp.path(), "src/lib/foo.o"); + auto result = pup::parse_target(tmp.path().string(), "src/lib/foo.o"); THEN("variant=nullopt, scope=src/lib/foo.o, is_output=true") { @@ -130,7 +130,7 @@ SCENARIO("Target parsing - variant detection", "[target]") } } -SCENARIO("Target parsing - glob expansion", "[target]") +SCENARIO("Target parsing - glob expansion", "[e2e][target]") { GIVEN("variants build-debug and build-release") { @@ -142,7 +142,7 @@ SCENARIO("Target parsing - glob expansion", "[target]") WHEN("parsing 'build-*'") { - auto result = pup::expand_glob_target(tmp.path(), "build-*"); + auto result = pup::expand_glob_target(tmp.path().string(), "build-*"); THEN("expands to [build-debug, build-release]") { @@ -150,7 +150,7 @@ SCENARIO("Target parsing - glob expansion", "[target]") auto variants = std::set {}; for (auto const& t : result) { REQUIRE(t.variant.has_value()); - variants.insert(t.variant->string()); + variants.insert(*t.variant); } REQUIRE(variants.count("build-debug") == 1); REQUIRE(variants.count("build-release") == 1); @@ -161,7 +161,7 @@ SCENARIO("Target parsing - glob expansion", "[target]") { tmp.create_dir("build-debug/src/lib"); tmp.create_dir("build-release/src/lib"); - auto result = pup::expand_glob_target(tmp.path(), "build-*/src/lib"); + auto result = pup::expand_glob_target(tmp.path().string(), "build-*/src/lib"); THEN("expands to [(build-debug, src/lib), (build-release, src/lib)]") { @@ -178,7 +178,7 @@ SCENARIO("Target parsing - glob expansion", "[target]") { tmp.create_file("build-debug/src/lib/foo.o"); tmp.create_file("build-release/src/lib/foo.o"); - auto result = pup::expand_glob_target(tmp.path(), "build-*/src/lib/foo.o"); + auto result = pup::expand_glob_target(tmp.path().string(), "build-*/src/lib/foo.o"); THEN("expands to [(build-debug, src/lib/foo.o), (build-release, src/lib/foo.o)]") { @@ -193,7 +193,7 @@ SCENARIO("Target parsing - glob expansion", "[target]") } } -SCENARIO("Target parsing - error cases", "[target]") +SCENARIO("Target parsing - error cases", "[e2e][target]") { GIVEN("a project with source file src/lib/foo.c") { @@ -203,7 +203,7 @@ SCENARIO("Target parsing - error cases", "[target]") WHEN("parsing 'src/lib/foo.c' as output target") { - auto result = pup::parse_target(tmp.path(), "src/lib/foo.c"); + auto result = pup::parse_target(tmp.path().string(), "src/lib/foo.c"); THEN("returns error: source file not build output") { @@ -220,7 +220,7 @@ SCENARIO("Target parsing - error cases", "[target]") WHEN("parsing 'no_such_dir/nonexistent'") { - auto result = pup::parse_target(tmp.path(), "no_such_dir/nonexistent"); + auto result = pup::parse_target(tmp.path().string(), "no_such_dir/nonexistent"); THEN("returns error: path not found") { @@ -237,7 +237,7 @@ SCENARIO("Target parsing - error cases", "[target]") WHEN("parsing 'nonexistent'") { - auto result = pup::parse_target(tmp.path(), "nonexistent"); + auto result = pup::parse_target(tmp.path().string(), "nonexistent"); THEN("treats as potential output target (validation deferred to build)") { @@ -248,7 +248,7 @@ SCENARIO("Target parsing - error cases", "[target]") } } -SCENARIO("Target parsing - consistency rule", "[target]") +SCENARIO("Target parsing - consistency rule", "[e2e][target]") { GIVEN("variants build-debug and build-release") { @@ -264,7 +264,7 @@ SCENARIO("Target parsing - consistency rule", "[target]") tmp.create_dir("build-debug/src"); tmp.create_dir("src/test"); auto targets = std::vector { "build-debug/src", "src/test" }; - auto result = pup::validate_target_consistency(tmp.path(), targets); + auto result = pup::validate_target_consistency(tmp.path().string(), targets); THEN("returns error: cannot mix variant-specific and all-variant targets") { @@ -276,7 +276,7 @@ SCENARIO("Target parsing - consistency rule", "[target]") WHEN("parsing ['build-debug', 'src']") { auto targets = std::vector { "build-debug", "src" }; - auto result = pup::validate_target_consistency(tmp.path(), targets); + auto result = pup::validate_target_consistency(tmp.path().string(), targets); THEN("returns error: cannot mix variant-specific and all-variant targets") { @@ -290,7 +290,7 @@ SCENARIO("Target parsing - consistency rule", "[target]") tmp.create_dir("build-debug/src"); tmp.create_dir("build-release/test"); auto targets = std::vector { "build-debug/src", "build-release/test" }; - auto result = pup::validate_target_consistency(tmp.path(), targets); + auto result = pup::validate_target_consistency(tmp.path().string(), targets); THEN("succeeds: both have explicit variants") { @@ -301,7 +301,7 @@ SCENARIO("Target parsing - consistency rule", "[target]") WHEN("parsing ['src', 'test']") { auto targets = std::vector { "src", "test" }; - auto result = pup::validate_target_consistency(tmp.path(), targets); + auto result = pup::validate_target_consistency(tmp.path().string(), targets); THEN("succeeds: neither has explicit variant") {