From 90e1a2cac1f99a771aa897d42d3331b3c343032c Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Sun, 28 Jun 2026 14:37:41 +0800 Subject: [PATCH] fix(parser): support simple/series/const UDF param qualifiers and T[] postfix-array decls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two parser defects surfaced while transpiling real-world published v6 strategies: 1. UDF parameter type qualifiers were not consumed. `simple string maType` (and `const`) were mis-parsed: only `series` was special-cased, so `f(..., simple string m)` split into two params (`simple`, `m`) and the generated C++ failed to compile (no matching function). Now any leading `series`/`simple`/`const` qualifier is consumed before the type — this also fixes UDT-typed series/simple params. 2. Postfix-array declarations (`float[] x = ...`, `var int[] xs = ...`) were silently dropped. `_parse_type_hint_string` didn't consume the trailing `[]`, so the following name failed to parse and the whole declaration was discarded by error recovery, leaving the variable undeclared downstream ("Unknown variable"). The `[]` postfix now lowers to `array`, and the statement dispatcher recognizes the bare `T[] name =` form. Adds 5 regression tests. Verified no corpus regression: re-transpiling all 258 corpus probes with vs without this change yields byte-identical generated.cpp (0 drift); a full local engine build + run of the 246-probe sweep reproduces the canonical 245-excellent / 1-anomaly parity. Co-Authored-By: Claude Opus 4.8 (1M context) --- pineforge_codegen/parser.py | 94 +++++++++++++++++++++---------------- tests/test_parser.py | 56 ++++++++++++++++++++++ 2 files changed, 110 insertions(+), 40 deletions(-) diff --git a/pineforge_codegen/parser.py b/pineforge_codegen/parser.py index c3220d8..3ddf301 100644 --- a/pineforge_codegen/parser.py +++ b/pineforge_codegen/parser.py @@ -197,6 +197,13 @@ def _parse_statement(self): # Check that the IDENT is followed by = (not == ) to confirm declaration if self._peek(2).type == TokenType.EQUALS: return self._parse_typed_decl() + # Postfix-array type-annotated declaration: float[] x = ..., int[] x = ... + if (cur.type in TYPE_KEYWORDS + and self._peek().type == TokenType.LBRACKET + and self._peek(2).type == TokenType.RBRACKET + and self._peek(3).type == TokenType.IDENT + and self._peek(4).type == TokenType.EQUALS): + return self._parse_typed_decl() # IDENT-prefixed type-annotated declaration: ``Sample s = ...``, # ``array arr = ...``, ``matrix m = ...`` — when the @@ -393,35 +400,42 @@ def _parse_assignment(self) -> Assignment: return self._set_loc(node, start_tok) def _parse_type_hint_string(self) -> str: - """Parse primitive, UDT, array, or map type hints.""" + """Parse primitive, UDT, array, map, or postfix-array (``T[]``) hints.""" base = self._advance().value - if not self._check(TokenType.LT): - return base - - parts: list[str] = [] - depth = 0 - self._advance() # < - while not self._at_end(): - tok = self._current() - if tok.type == TokenType.LT: - depth += 1 - parts.append("<") - self._advance() - continue - if tok.type == TokenType.GT: - if depth == 0: + if self._check(TokenType.LT): + parts: list[str] = [] + depth = 0 + self._advance() # < + while not self._at_end(): + tok = self._current() + if tok.type == TokenType.LT: + depth += 1 + parts.append("<") self._advance() - break - depth -= 1 - parts.append(">") + continue + if tok.type == TokenType.GT: + if depth == 0: + self._advance() + break + depth -= 1 + parts.append(">") + self._advance() + continue + if tok.type == TokenType.COMMA: + parts.append(",") + else: + parts.append(str(tok.value)) self._advance() - continue - if tok.type == TokenType.COMMA: - parts.append(",") - else: - parts.append(str(tok.value)) - self._advance() - return f"{base}<{''.join(parts)}>" + base = f"{base}<{''.join(parts)}>" + + # Pine postfix-array shorthand: `float[]` == `array`, `T[]` == `array`. + # Without this the trailing `[ ]` is left unconsumed, the following name + # fails to parse, and the whole declaration is silently dropped. + while self._check(TokenType.LBRACKET) and self._peek().type == TokenType.RBRACKET: + self._advance() # [ + self._advance() # ] + base = f"array<{base}>" + return base def _parse_template_args(self) -> list[str]: """Parse and return generic args after a member name, e.g. new().""" @@ -561,24 +575,24 @@ def _parse_func_def(self) -> FuncDef: TokenType.TYPE_BOOL, TokenType.TYPE_STRING} params = [] while not self._check(TokenType.RPAREN): - # Pine: series float x / series int x — one parameter (not "series" + "x") - if self._check(TokenType.IDENT) and self._current().value == "series": - self._advance() # consume 'series' - if self._current().type in TYPE_TOKENS: - self._advance() # float, int, ... - param_name = self._consume(TokenType.IDENT).value - if self._check(TokenType.EQUALS): - self._advance() - self._parse_expression() - params.append(param_name) - self._match(TokenType.COMMA) - continue - # Handle optional type annotation: type param (e.g., int len, float src) + # Consume optional Pine parameter type qualifiers, e.g. `series float x`, + # `simple string maType`, `const int n`. Each is one qualifier in front + # of the (optional) type and the name — NOT a separate parameter. + while self._check(TokenType.IDENT) and self._current().value in ( + "series", + "simple", + "const", + ): + self._advance() + # Handle optional type annotation: type param (e.g., int len, float src, + # string s). Built-in types are dedicated tokens; user-defined types are + # IDENTs and handled by the "next is IDENT" check below. if self._current().type in TYPE_TOKENS: self._advance() # skip the type annotation param_name = self._consume(TokenType.IDENT).value if self._check(TokenType.IDENT): - # 'param_name' was actually a type name parsed as IDENT, next is real name + # 'param_name' was actually a (user-defined) type name parsed as IDENT, + # next is the real name. param_name = self._consume(TokenType.IDENT).value # Skip default value: param = expr if self._check(TokenType.EQUALS): diff --git a/tests/test_parser.py b/tests/test_parser.py index 9b40583..d02d527 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -340,3 +340,59 @@ def test_parse_basic_strategy(pine_file): prog = _parse(src) assert isinstance(prog, Program) assert len(prog.body) > 0 + + +# === Regression: UDF param qualifiers + postfix-array declarations === + +def test_udf_param_simple_qualifier_is_one_param(): + """`simple string m` is a single qualified param, not two params.""" + prog = _parse( + "//@version=6\nstrategy(\"t\")\n" + "ma(float s, int l, simple string m) =>\n ta.ema(s, l)\n" + ) + fdef = next(s for s in prog.body if isinstance(s, FuncDef)) + assert fdef.params == ["s", "l", "m"] + + +def test_udf_param_series_and_const_qualifiers(): + prog = _parse( + "//@version=6\nstrategy(\"t\")\n" + "f(series float a, const int b, simple bool c) =>\n a\n" + ) + fdef = next(s for s in prog.body if isinstance(s, FuncDef)) + assert fdef.params == ["a", "b", "c"] + + +def test_postfix_array_decl_var_keyword(): + """`var float[] x = ...` must register a VarDecl (was silently dropped).""" + prog = _parse( + "//@version=6\nstrategy(\"t\")\n" + "var float[] qp = array.from(0.1, 0.2, 0.3)\n" + ) + decl = next((s for s in prog.body if isinstance(s, VarDecl) and s.name == "qp"), None) + assert decl is not None + assert decl.is_var is True + assert "array" in (decl.type_hint or "") + + +def test_postfix_array_decl_bare(): + prog = _parse( + "//@version=6\nstrategy(\"t\")\n" + "int[] xs = array.new_int(3, 0)\n" + ) + decl = next((s for s in prog.body if isinstance(s, VarDecl) and s.name == "xs"), None) + assert decl is not None + assert "array" in (decl.type_hint or "") + + +def test_postfix_array_and_simple_qualifier_transpile(): + """End-to-end: both fixed forms transpile to C++ without error.""" + from pineforge_codegen import transpile + cpp = transpile( + "//@version=6\nstrategy(\"t\")\n" + "ma(float s, int l, simple string m) =>\n ta.ema(s, l)\n" + "var float[] qp = array.from(0.1, 0.2, 0.3)\n" + "if close > 0\n plot(ma(qp.get(0), 10, \"EMA\"))\n" + ) + assert "ma_cs0(double source" not in cpp # no spurious 'simple' param split + assert len(cpp.splitlines()) > 10