From f31e3c296f03bf2a640992a3ae71770c710296d2 Mon Sep 17 00:00:00 2001 From: luisleo526 Date: Wed, 1 Jul 2026 01:21:03 +0800 Subject: [PATCH] test(verify): consolidate TradingView trade-list fragments before pairing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TradingView's "List of Trades" (and the engine mirroring it) splits one entry FILL into multiple "Trade #" rows: a tiny qty_step rounding remainder sharing the SAME entry time+price, or FIFO partial-close lots of a grid bot. The greedy entry-time matcher in verify_corpus then cross-pairs same-entry lots, producing spurious count + exit-price-p90 deltas (the ~90% qty-p90 is the fingerprint) even though the engine's fills are trade-for-trade price-exact. Add consolidate_fragments(): group rows by an EXACT (entry_time, entry_price, direction) key and merge each group into one logical trade (sum qty/pnl, keep the shared entry, represent the exit by the shared price or qty-weighted final close). Applied symmetrically to the TV and engine lists before alignment (mirrored in regen_validation_report.py). Because the key is compared exactly, two rows merge only when they are the same fill event — a distinct trade lands on a different bar or price level and keeps its own key, so real divergences are never masked. Corpus byte-identical: verify_corpus.py --all stays excellent=251 / anomaly=1 / fail=0 with ZERO tier changes (4 corpus strategies that legitimately fragment consolidate symmetrically and stay excellent — the key is correct, not inert). Scraped targets: tomukasss weak->excellent (count 31%->0%, qty-p90 51%->0.2%); xlm/xau grids' fragment artifact cleared (qty-p90 ~90%->~0.5%, count->~0) while their REAL residual exit-p90 ~6% (TV holds lots days longer) correctly remains. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/regen_validation_report.py | 4 ++ scripts/verify_corpus.py | 103 +++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/scripts/regen_validation_report.py b/scripts/regen_validation_report.py index d31bdab..ce41019 100755 --- a/scripts/regen_validation_report.py +++ b/scripts/regen_validation_report.py @@ -86,6 +86,10 @@ def _verify_probe(strategy_dir: Path) -> dict: tv = vc.parse_trades(tv_path, tz=vc.tv_tzinfo(meta)) eng = vc.parse_trades(eng_path, tz=vc.timezone.utc) + # Keep the report in lock-step with verify_one: consolidate fragment rows + # (qty_step rounding / FIFO partial-close lots) symmetrically before pairing. + tv = vc.consolidate_fragments(tv) + eng = vc.consolidate_fragments(eng) matched = vc.align_by_time(tv, eng) tv_cmp, eng_cmp = vc.trim_to_common_match_window(tv, eng, matched) matched = vc.align_by_time(tv_cmp, eng_cmp) diff --git a/scripts/verify_corpus.py b/scripts/verify_corpus.py index 4f6257a..4727a31 100755 --- a/scripts/verify_corpus.py +++ b/scripts/verify_corpus.py @@ -363,6 +363,103 @@ def parse_trades(csv_path: Path, *, tz) -> list[TradePair]: return pairs +def consolidate_fragments(pairs: list[TradePair]) -> list[TradePair]: + """Reunite the fragment rows that split a single logical fill into one trade. + + TradingView's "List of Trades" (and the engine, mirroring it) splits one + entry FILL across several ``Trade #`` rows whenever that position is closed + in lots — either a tiny ``qty_step`` rounding remainder that shares the + SAME entry time AND price, or FIFO partial-close fragments of a grid bot + where one entry is drained by several exit orders. Every such fragment is a + *different exit lot of the same entry*, so the entry side is identical + across the group: same bar timestamp, same fill price, same direction. + + Left raw, these fragments break the entry-time pairing in + :func:`align_by_time`: two fragments share one entry instant, so the greedy + matcher cross-pairs a TV lot with the wrong engine lot and reports spurious + count + exit-price deltas (the tell-tale ~90% qty-p90 is the fingerprint). + This helper merges each fill back into one trade and is applied + SYMMETRICALLY to the TV and engine lists, so a genuinely fragmented + strategy still pairs 1:1. + + Merge key = ``(entry_time, entry_price, direction)`` compared EXACTLY: two + rows merge iff they share the same bar, the same fill price (read from the + identical CSV cell, hence bit-identical within one file) and the same side + — i.e. they are the same fill event. Two *distinct* trades can never + collide, because a second independent entry must occur on a different bar + or at a different fill price (a different grid level), either of which + changes the key. For an un-fragmented strategy every group has size 1, so + this is a strict no-op and the reference corpus is left byte-identical. + + The merged trade keeps the shared entry (time + price) and direction, sums + the per-lot qty / pnl / excursions, and represents the exit by the lots' + qty-weighted-average price at the final close time — the way TradingView + aggregates a multi-lot deal. When every fragment shares one exit (pure + qty_step rounding) that average IS the shared exit price, kept exactly so + the comparison stays bit-for-bit unchanged. + + >>> mk = lambda n, et, ep, xt, xp, q, p: TradePair("long", et, ep, xt, xp, q, p, n) + >>> # two qty_step rounding fragments of one fill: same entry AND same exit + >>> a = mk(1, 100, 10.0, 200, 12.0, 0.01, 0.02) + >>> b = mk(2, 100, 10.0, 200, 12.0, 0.99, 1.98) + >>> # a distinct later trade (different entry bar + price) must NOT merge + >>> c = mk(3, 300, 11.0, 400, 13.0, 1.00, 2.00) + >>> out = consolidate_fragments([a, b, c]) + >>> [(round(t.qty, 4), round(t.pnl, 4), t.exit_price) for t in out] + [(1.0, 2.0, 12.0), (1.0, 2.0, 13.0)] + >>> # FIFO grid: ONE entry drained by two DIFFERENT exit lots -> one deal, + >>> # exit = qty-weighted average price at the final close time + >>> d = mk(4, 100, 10.0, 150, 12.0, 0.5, 1.0) + >>> e = mk(5, 100, 10.0, 250, 14.0, 0.5, 2.0) + >>> g = consolidate_fragments([d, e]) + >>> len(g), g[0].qty, g[0].exit_price, g[0].exit_time + (1, 1.0, 13.0, 250) + """ + groups: dict[tuple[int, float, str], list[TradePair]] = {} + order: list[tuple[int, float, str]] = [] + for t in pairs: + key = (t.entry_time, t.entry_price, t.direction) + if key not in groups: + groups[key] = [] + order.append(key) + groups[key].append(t) + + out: list[TradePair] = [] + for key in order: + members = groups[key] + if len(members) == 1: + out.append(members[0]) + continue + qty = sum(m.qty for m in members) + denom = qty if qty else 1.0 + rep = members[0] + if len({m.exit_price for m in members}) == 1: + # Shared-exit fragments (pure qty_step rounding): keep the exact + # shared exit so the merge is bit-for-bit identical to a single fill. + exit_price = rep.exit_price + exit_time = rep.exit_time + else: + # FIFO partial-close lots: blend like a TV deal — qty-weighted + # average exit price, settled at the final close time. + exit_price = sum(m.exit_price * m.qty for m in members) / denom + exit_time = max(m.exit_time for m in members) + out.append(TradePair( + direction=rep.direction, + entry_time=rep.entry_time, + entry_price=rep.entry_price, + exit_time=exit_time, + exit_price=exit_price, + qty=qty, + pnl=sum(m.pnl for m in members), + trade_num=min(m.trade_num for m in members), + pnl_pct=sum(m.pnl_pct * m.qty for m in members) / denom, + mfe=sum(m.mfe for m in members), + mae=sum(m.mae for m in members), + )) + out.sort(key=lambda t: t.entry_time) + return out + + def load_strategy_metadata(strategy_dir: Path) -> dict: inputs_path = strategy_dir / "inputs.json" if not inputs_path.exists(): @@ -458,6 +555,12 @@ def verify_one(strategy_dir: Path, *, verbose: bool = True, show_diffs: int = 0) tv = parse_trades(tv_path, tz=tv_tzinfo(meta)) eng = parse_trades(eng_path, tz=timezone.utc) + # Reunite TradingView/engine fragment rows (qty_step rounding remainders or + # FIFO partial-close lots of one fill) into a single logical trade BEFORE + # pairing, symmetrically on both sides, so the entry-time matcher does not + # cross-pair same-entry lots. No-op for un-fragmented strategies. + tv = consolidate_fragments(tv) + eng = consolidate_fragments(eng) matched = align_by_time(tv, eng) tv_cmp, eng_cmp = trim_to_common_match_window(tv, eng, matched) matched = align_by_time(tv_cmp, eng_cmp)