diff --git a/pineforge_codegen/analyzer/base.py b/pineforge_codegen/analyzer/base.py index 43007e7..52106d3 100644 --- a/pineforge_codegen/analyzer/base.py +++ b/pineforge_codegen/analyzer/base.py @@ -127,6 +127,20 @@ def __init__(self, ast: Program, filename: str = "") -> None: self._global_var_decls: list[tuple[str, PineType]] = [] self._global_expr_map: dict[str, Any] = {} self._var_member_init_exprs: dict[str, Any] = {} + # Block-scoped ``var``/``varip`` name-collision disambiguation. + # Two same-named block-scoped vars in SIBLING non-global, non-function + # scopes (e.g. ``var bool valid`` declared inside ``if A`` and again + # inside ``if B``) would otherwise dedupe to ONE C++ member and + # cross-contaminate. ``_block_node_stack`` tracks the enclosing + # block AST nodes during analysis; ``_block_var_owner`` maps a raw + # block-var name to the id() of the FIRST block that declared it; + # ``_block_var_renames`` maps id(block_node) -> {raw_name: unique} + # for every later colliding block so codegen can activate the + # rename via ``_active_var_remap`` while emitting that block. + self._block_node_stack: list[Any] = [] + self._block_var_owner: dict[str, int] = {} + self._block_var_renames: dict[int, dict[str, str]] = {} + self._block_var_seq = 0 self._ta_counter = 0 self._fixnan_counter = 0 # Track user-defined function nodes for deferred analysis @@ -305,6 +319,7 @@ def analyze(self) -> AnalyzerContext: udt_var_types=dict(self._udt_var_types), collection_types=dict(self._collection_types), udt_field_type_specs=dict(self._udt_field_type_specs), + block_var_renames=dict(self._block_var_renames), ) def _record_global_binding_stmt(self, name: str, pine_type: PineType, @@ -750,14 +765,41 @@ def _visit_VarDecl(self, node: VarDecl) -> PineType: # Track var members if node.is_var or node.is_varip: init_str = self._expr_to_str(node.value) - self._var_members.append((node.name, val_type, init_str)) + scope_name = self._symbols.current_scope.name + # Block-scoped var name-collision disambiguation. A ``var``/``varip`` + # declared inside a non-global, non-function block (an ``if`` / ``for`` + # / ``while`` body at on_bar scope) is keyed by RAW name. Two sibling + # blocks declaring the same name would dedupe to ONE C++ member and + # cross-contaminate (proven: egoigor1976-1-trendline-strategy's + # ``var bool valid`` in the upper- and lower-trendline ``if`` blocks). + # When such a name already belongs to a DIFFERENT block, mint a + # scope-unique member name and record the rename so codegen activates + # it (via ``_active_var_remap``) while emitting that block. + member_name = node.name + is_block_scoped = ( + not self._global_scope + and not scope_name.startswith("func_") + and not scope_name.startswith("method_") + and bool(self._block_node_stack) + ) + if is_block_scoped: + block_id = id(self._block_node_stack[-1]) + owner = self._block_var_owner.get(node.name) + if owner is None: + # First block to claim this name keeps the raw member name. + self._block_var_owner[node.name] = block_id + elif owner != block_id: + # Sibling-scope collision: disambiguate this declaration. + self._block_var_seq += 1 + member_name = f"{node.name}__blk{self._block_var_seq}" + self._block_var_renames.setdefault(block_id, {})[node.name] = member_name + self._var_members.append((member_name, val_type, init_str)) # Capture the init AST too so codegen can inspect the RHS callee # (used to detect int64-returning builtins like ``time()`` and # promote the symbol storage type to ``int64_t``). if node.value is not None: - self._var_member_init_exprs[node.name] = node.value + self._var_member_init_exprs[member_name] = node.value # Track function-scoped var members - scope_name = self._symbols.current_scope.name if scope_name.startswith("func_"): func_name = scope_name[5:] # strip "func_" prefix if func_name not in self._func_var_members: @@ -1127,6 +1169,7 @@ def _visit_MethodDef(self, node) -> PineType: def _visit_IfStmt(self, node: IfStmt) -> PineType: old_global = self._global_scope self._global_scope = False + self._block_node_stack.append(node) try: self._visit(node.condition) body_type = PineType.VOID @@ -1135,6 +1178,7 @@ def _visit_IfStmt(self, node: IfStmt) -> PineType: for stmt in node.else_body: self._visit(stmt) finally: + self._block_node_stack.pop() self._global_scope = old_global # If used as expression (x = if ...), return last expr type return body_type @@ -1158,6 +1202,7 @@ def _visit_ForStmt(self, node: ForStmt) -> PineType: old_global = self._global_scope self._global_scope = False + self._block_node_stack.append(node) try: self._visit(node.start) self._visit(node.end) @@ -1166,6 +1211,7 @@ def _visit_ForStmt(self, node: ForStmt) -> PineType: for stmt in node.body: self._visit(stmt) finally: + self._block_node_stack.pop() self._global_scope = old_global self._symbols.exit_scope() @@ -1174,6 +1220,7 @@ def _visit_ForStmt(self, node: ForStmt) -> PineType: def _visit_ForInStmt(self, node) -> PineType: old_global = self._global_scope self._global_scope = False + self._block_node_stack.append(node) try: self._visit(node.iterable) self._symbols.enter_scope("for_in") @@ -1196,17 +1243,20 @@ def _visit_ForInStmt(self, node) -> PineType: self._visit(stmt) self._symbols.exit_scope() finally: + self._block_node_stack.pop() self._global_scope = old_global return PineType.VOID def _visit_WhileStmt(self, node: WhileStmt) -> PineType: old_global = self._global_scope self._global_scope = False + self._block_node_stack.append(node) try: self._visit(node.condition) for stmt in node.body: self._visit(stmt) finally: + self._block_node_stack.pop() self._global_scope = old_global return PineType.VOID diff --git a/pineforge_codegen/analyzer/contracts.py b/pineforge_codegen/analyzer/contracts.py index 22dfaa9..552d3a5 100644 --- a/pineforge_codegen/analyzer/contracts.py +++ b/pineforge_codegen/analyzer/contracts.py @@ -179,6 +179,12 @@ class AnalyzerContext: collection_types: dict[str, TypeSpec] = field(default_factory=dict) # UDT name -> field_name -> structured type metadata udt_field_type_specs: dict[str, dict[str, TypeSpec]] = field(default_factory=dict) + # id(block_node) -> {raw_var_name: scope_unique_member_name} for block-scoped + # ``var``/``varip`` declarations whose raw name collides with a same-named + # block var in a sibling scope. Codegen activates the rename via + # ``_active_var_remap`` while emitting that block's statements so reads/writes + # of the var resolve to the disambiguated member. + block_var_renames: dict[int, dict[str, str]] = field(default_factory=dict) # ``// @pf-trace name=expr`` pragmas in source order. Populated by # :func:`pineforge_codegen.pragmas.extract_pf_trace_pragmas` from # the original source text and attached after :class:`Analyzer` diff --git a/pineforge_codegen/codegen/base.py b/pineforge_codegen/codegen/base.py index 1d4aed1..31a577c 100644 --- a/pineforge_codegen/codegen/base.py +++ b/pineforge_codegen/codegen/base.py @@ -394,6 +394,10 @@ def __init__(self, ctx: AnalyzerContext) -> None: self._global_mutable_infos: dict[str, object] = getattr(ctx, "global_mutable_infos", {}) or {} self._udt_var_types: dict[str, str] = getattr(ctx, "udt_var_types", {}) or {} self._collection_types: dict[str, TypeSpec] = getattr(ctx, "collection_types", {}) or {} + # id(block_node) -> {raw_var_name: unique_member} for block-scoped var + # name collisions (see Analyzer._visit_VarDecl). Activated into + # ``_active_var_remap`` while emitting the owning block's statements. + self._block_var_renames: dict[int, dict[str, str]] = getattr(ctx, "block_var_renames", {}) or {} self._udt_field_type_specs: dict[str, dict[str, TypeSpec]] = getattr(ctx, "udt_field_type_specs", {}) or {} # Map UDT struct name -> set of field names that were dropped from the # emitted C++ struct because they had drawing-only types (label, line, diff --git a/pineforge_codegen/codegen/types.py b/pineforge_codegen/codegen/types.py index a8a153e..367d682 100644 --- a/pineforge_codegen/codegen/types.py +++ b/pineforge_codegen/codegen/types.py @@ -51,6 +51,23 @@ TA_RETURNS_BOOL, ) +# Collection (array / map / matrix) methods that MUTATE the receiver in place. +# Used by the BUG-2 collection-lvalue-alias path to decide whether a local +# bound to an existing collection lvalue must alias (mutated) or may value-copy +# (read-only). A method missing here only costs the alias optimization; a +# non-mutating method accidentally present would alias a read-only local, which +# is still correct (reads through a reference equal reads through a copy). +COLLECTION_MUTATING_METHODS = frozenset({ + # array + "push", "unshift", "insert", "remove", "pop", "shift", "clear", + "set", "fill", "sort", "reverse", "concat", + # map + "put", "put_all", + # matrix + "add_row", "add_col", "remove_row", "remove_col", "reshape", + "swap_rows", "swap_columns", +}) + class TypeInferer: """Type-spec / C++-type inference helpers shared across visitor mixins. @@ -594,6 +611,89 @@ def _udt_local_alias_kind(self, node: VarDecl) -> tuple[str, str] | None: return None return ("ptr" if rebinds_to_other_lvalue else "ref"), udt_t + # ------------------------------------------------------------------ + # BUG 2: collection (array / map / matrix) lvalue aliasing + # ------------------------------------------------------------------ + + def _collection_lvalue_spec(self, expr): + """If ``expr`` is a bare ``Identifier`` naming an array/map/matrix + var/global member, return its ``TypeSpec``; else ``None``. Pine + collections are reference types, so a local bound to such an lvalue and + then mutated through must ALIAS it, not value-copy.""" + if not isinstance(expr, Identifier): + return None + name = expr.name + if name in self._matrix_specs: + return self._matrix_specs[name] + spec = self._collection_types.get(name) + if spec is not None and spec.kind in ("array", "map", "matrix"): + return spec + if name in self._array_vars: + return self._array_spec_for_name(name) + if name in self._map_vars: + return self._map_spec_for_name(name) + return None + + def _collection_lvalue_selection_spec(self, expr): + """``TypeSpec`` if ``expr`` is a collection lvalue OR a ternary/switch + whose every selectable branch is a collection lvalue of the SAME C++ + type; ``None`` otherwise (so ``array.new(...)`` ctors, copies, function + returns, and mixed selections keep value-copy semantics). Mirrors + ``_udt_lvalue_selection_type`` for the BUG-2 collection-alias path.""" + direct = self._collection_lvalue_spec(expr) + if direct is not None: + return direct + branches: list = [] + if isinstance(expr, Ternary): + branches = [expr.true_val, expr.false_val] + elif isinstance(expr, SwitchStmt): + for _case_expr, stmts in (expr.cases or []): + if not stmts: + return None + last = stmts[-1] + branches.append(last.expr if isinstance(last, ExprStmt) else last) + if expr.default_body: + last = expr.default_body[-1] + branches.append(last.expr if isinstance(last, ExprStmt) else last) + else: + return None + if not branches: + return None + specs = [self._collection_lvalue_spec(b) for b in branches] + if any(s is None for s in specs): + return None + cpp_types = {self._type_spec_to_cpp(s) for s in specs} + if len(cpp_types) == 1: + return specs[0] + return None + + def _collection_local_must_alias(self, node) -> bool: + """True when the local ``node`` declares an alias of an existing + collection lvalue that is later MUTATED in the enclosing function body + (``local.push/unshift/insert/remove/set/clear/pop/...``). A purely-read + local needn't alias; a local REASSIGNED to a different value can't be a + C++ reference, so it bails to value-copy (returns ``False``).""" + from ..ast_nodes import Assignment + body = getattr(self, "_current_func_body", None) + if body is None: + return False + name = node.name + mutated = False + for stmt in self._walk_ast_list(body): + # Rebind of the local itself (``orderBlocks := other``) — a C++ + # reference cannot rebind, so keep value-copy semantics. + if (isinstance(stmt, Assignment) + and isinstance(stmt.target, Identifier) + and stmt.target.name == name): + return False + if (isinstance(stmt, FuncCall) + and isinstance(stmt.callee, MemberAccess) + and isinstance(stmt.callee.object, Identifier) + and stmt.callee.object.name == name + and stmt.callee.member in COLLECTION_MUTATING_METHODS): + mutated = True + return mutated + def _walk_ast_list(self, stmts): """Yield every node within a list of statements (depth-first).""" for s in stmts: diff --git a/pineforge_codegen/codegen/visit_stmt.py b/pineforge_codegen/codegen/visit_stmt.py index 93db171..be98154 100644 --- a/pineforge_codegen/codegen/visit_stmt.py +++ b/pineforge_codegen/codegen/visit_stmt.py @@ -106,6 +106,10 @@ MATRIX_RETURNING_METHODS, ) +# Sentinel for "no block-scoped var remap was activated" so an empty dict +# saved-remap is still distinguishable from the no-op case. +_NO_BLOCK_REMAP = object() + class StmtVisitor: """Statement-level visitor methods shared across the codegen. @@ -422,6 +426,31 @@ def _visit_var_decl(self, node: VarDecl, lines: list[str], pad: str) -> None: lines.append(f"{pad}{udt_t}* {safe} = {cpp_val};") return + # Collection lvalue alias (BUG 2): a local bound to an existing array / + # map / matrix lvalue (or a ternary/switch selecting same-typed ones) + # and later MUTATED through must ALIAS the member, not value-copy — Pine + # collections are reference types. Proven: jevondijefferson-big-breakout + # does ``array orderBlocks = internal ? internalOrderBlocks + # : swingOrderBlocks`` then ``orderBlocks.unshift(ob)`` in three helpers; + # the value-copy left the member arrays empty. Emit a non-rebinding C++ + # reference instead. + if not is_global_member: + coll_spec = self._collection_lvalue_selection_spec(node.value) + if coll_spec is not None and self._collection_local_must_alias(node): + # Register the local's collection kind so subsequent + # ``.size()/.get()/.unshift()`` dispatch resolves correctly. + self._collection_types[node.name] = coll_spec + if coll_spec.kind == "array": + self._array_vars.add(node.name) + elif coll_spec.kind == "map": + self._map_vars.add(node.name) + elif coll_spec.kind == "matrix": + self._matrix_specs[node.name] = coll_spec + cpp_type = self._type_spec_to_cpp(coll_spec) + cpp_val = self._visit_rhs_value(node.value, node.name, target_cpp_type=cpp_type) + lines.append(f"{pad}{cpp_type}& {safe} = {cpp_val};") + return + # General declaration cpp_type = self._type_for_decl(node) if not is_global_member else None cpp_val = self._visit_rhs_value(node.value, node.name, target_cpp_type=cpp_type) @@ -627,7 +656,30 @@ def _visit_tuple_assign(self, node: TupleAssign, lines: list[str], pad: str) -> lines.append(f"{pad}/* unsupported tuple assignment */") + def _push_block_var_remap(self, node): + """Activate block-scoped var renames for ``node`` (BUG 1). Returns the + previous ``_active_var_remap`` to restore (or ``_NO_BLOCK_REMAP`` if this + block owns no renames). Renames are MERGED over the inherited remap so + nested blocks keep any enclosing func-clone / outer-block mapping.""" + renames = self._block_var_renames.get(id(node)) + if not renames: + return _NO_BLOCK_REMAP + saved = self._active_var_remap + self._active_var_remap = {**saved, **renames} + return saved + + def _pop_block_var_remap(self, saved) -> None: + if saved is not _NO_BLOCK_REMAP: + self._active_var_remap = saved + def _visit_if(self, node: IfStmt, lines: list[str], indent: int) -> None: + _blk_saved = self._push_block_var_remap(node) + try: + self._visit_if_body(node, lines, indent) + finally: + self._pop_block_var_remap(_blk_saved) + + def _visit_if_body(self, node: IfStmt, lines: list[str], indent: int) -> None: pad = " " * indent # TA hoisting: inside per-call-site function variants, execute ALL @@ -673,8 +725,12 @@ def _visit_for(self, node: ForStmt, lines: list[str], indent: int) -> None: self._current_loop_vars = set(self._current_loop_vars) if var: self._current_loop_vars.add(var) - for s in node.body: - self._visit_stmt(s, lines, indent + 1) + _blk_saved = self._push_block_var_remap(node) + try: + for s in node.body: + self._visit_stmt(s, lines, indent + 1) + finally: + self._pop_block_var_remap(_blk_saved) self._current_loop_vars = saved_loop lines.append(f"{pad}}}") @@ -695,8 +751,12 @@ def _visit_for_in(self, node, lines: list[str], indent: int) -> None: elif node.vars: bindings = ", ".join(node.vars) lines.append(f"{pad}for (auto [{bindings}] : {iterable}) {{") - for s in node.body: - self._visit_stmt(s, lines, indent + 1) + _blk_saved = self._push_block_var_remap(node) + try: + for s in node.body: + self._visit_stmt(s, lines, indent + 1) + finally: + self._pop_block_var_remap(_blk_saved) lines.append(f"{pad}}}") self._current_loop_vars = saved_loop @@ -704,8 +764,12 @@ def _visit_while(self, node: WhileStmt, lines: list[str], indent: int) -> None: pad = " " * indent cond = self._visit_expr(node.condition) lines.append(f"{pad}while ({cond}) {{") - for s in node.body: - self._visit_stmt(s, lines, indent + 1) + _blk_saved = self._push_block_var_remap(node) + try: + for s in node.body: + self._visit_stmt(s, lines, indent + 1) + finally: + self._pop_block_var_remap(_blk_saved) lines.append(f"{pad}}}") def _visit_switch(self, node: SwitchStmt, lines: list[str], indent: int) -> None: diff --git a/tests/test_codegen_new.py b/tests/test_codegen_new.py index f2b33d2..7c3b790 100644 --- a/tests/test_codegen_new.py +++ b/tests/test_codegen_new.py @@ -1385,3 +1385,93 @@ def test_nested_helper_multi_path_distinct_var_state(): assert len(var_used) == len(set(var_used)), ( f"two leg clones share the `var int l` state member: {var_used}" ) + + +# === Regression: block-scoped var name collision (BUG 1) === +# egoigor1976-1-trendline-strategy declared `var bool valid` inside two sibling +# top-level `if` blocks; both deduped to ONE C++ member and cross-contaminated. +# Block-scoped vars colliding by raw name across sibling scopes must now mint a +# distinct member each. (44%->100% price-exact once disambiguated.) + + +def test_block_scoped_var_collision_emits_distinct_members(): + cpp = _generate(""" +//@version=6 +strategy("T") +if close > open + var int counter = 0 + counter := counter + 1 +if close < open + var int counter = 0 + counter := counter + 1 +""") + # Two sibling-block `var int counter`s -> two distinct C++ members. + assert "int counter;" in cpp + assert "int counter__blk1;" in cpp, ( + "second sibling-block `var int counter` must get a scope-unique member; " + "without disambiguation both blocks share one member and cross-contaminate" + ) + # Both members get their own ctor init. + assert "counter(0)" in cpp and "counter__blk1(0)" in cpp + # The first (upper) block keeps the raw name; the second (lower) is remapped. + assert "counter = (counter + 1)" in cpp + assert "counter__blk1 = (counter__blk1 + 1)" in cpp + + +def test_block_scoped_var_no_collision_keeps_raw_name(): + # A single block-scoped var (no sibling collision) must NOT be renamed. + cpp = _generate(""" +//@version=6 +strategy("T") +if close > open + var int counter = 0 + counter := counter + 1 +""") + assert "int counter;" in cpp + assert "__blk" not in cpp, "no-op expected when there is no sibling-scope collision" + + +# === Regression: collection lvalue aliasing value-copied (BUG 2) === +# jevondijefferson-big-breakout-strategy bound a local to an existing member +# array via a ternary then mutated it (`orderBlocks.unshift(...)`); the +# value-copy meant the member array never grew. A mutated local aliasing an +# existing collection lvalue must emit a non-rebinding C++ reference. +# (36%->77% once aliased.) + + +def test_collection_lvalue_alias_emits_reference_when_mutated(): + cpp = _generate(""" +//@version=6 +strategy("T") +var array a = array.new() +var array b = array.new() +pick(bool which) => + array sel = which ? a : b + sel.push(close) + array.size(sel) +n = pick(close > open) +""") + # The mutated local aliases the selected member -> reference, not a copy. + assert "std::vector& sel = " in cpp, ( + "a local collection aliasing a member array and then mutated must emit " + "a `&` reference; a value-copy never mutates the member" + ) + # Sanity: the (buggy) value-copy form must be gone. + assert "std::vector sel = " not in cpp + + +def test_collection_lvalue_alias_readonly_stays_value_copy(): + # A read-only local bound to a member array is NOT aliased (no mutation), + # so the conservative value-copy is preserved (no-op guarantee). + cpp = _generate(""" +//@version=6 +strategy("T") +var array a = array.new() +var array b = array.new() +peek(bool which) => + array sel = which ? a : b + array.size(sel) +n = peek(close > open) +""") + assert "std::vector& sel = " not in cpp + assert "std::vector sel = " in cpp