From 2ba0c00d62cdbdf9a0b0c9cc7d03e1ed6692b1f6 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 13 May 2026 11:48:49 +0800 Subject: [PATCH 1/6] {"schema":"decodex/commit/1","summary":"Add Radar publisher workflow contracts","authority":"manual"} --- README.md | 8 + artifacts/github/README.md | 1 + artifacts/github/impact/.gitkeep | 1 + artifacts/social/README.md | 9 + artifacts/social/x/.gitkeep | 1 + dev/skills/github-signal/SKILL.md | 12 +- docs/decisions/index.md | 3 + .../radar-control-plane-publisher.md | 36 +++ docs/index.md | 8 + docs/reference/workspace-layout.md | 7 + docs/runbook/index.md | 2 + docs/runbook/local-github-signal-workflow.md | 17 +- docs/runbook/social-publishing-workflow.md | 120 ++++++++++ docs/spec/index.md | 5 + docs/spec/social-post-draft.md | 100 +++++++++ docs/spec/upstream-impact.md | 110 +++++++++ scripts/github/README.md | 8 + scripts/github/social_post_draft.schema.json | 211 ++++++++++++++++++ scripts/github/upstream_impact.schema.json | 114 ++++++++++ 19 files changed, 768 insertions(+), 5 deletions(-) create mode 100644 artifacts/github/impact/.gitkeep create mode 100644 artifacts/social/README.md create mode 100644 artifacts/social/x/.gitkeep create mode 100644 docs/decisions/radar-control-plane-publisher.md create mode 100644 docs/runbook/social-publishing-workflow.md create mode 100644 docs/spec/social-post-draft.md create mode 100644 docs/spec/upstream-impact.md create mode 100644 scripts/github/social_post_draft.schema.json create mode 100644 scripts/github/upstream_impact.schema.json diff --git a/README.md b/README.md index 0af7c4d6..8355bf95 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Repo-native agent orchestration and public Codex signal publishing. - Static Astro site that publishes GitHub-backed Codex change signals. - Deterministic GitHub signal pipeline for change bundles, release deltas, rendered signal entries, and content validation. +- Publisher workflow for checked-in upstream impact classification and reviewable X + drafts for `@decodexspace`. - Installable Decodex plugin with reusable agent-facing skills for manual CLI, automation, commit, land, and labels. - Repository documentation split by question type into spec, runbook, reference, and @@ -55,6 +57,7 @@ runtime. - `scripts/github/` owns deterministic GitHub bundle, release-delta, render, and validation scripts. - `artifacts/github/` owns checked-in GitHub bundles and editorial analysis drafts. +- `artifacts/social/` owns checked-in Publisher social draft artifacts. - `plugins/decodex/` owns the installable Decodex plugin and reusable agent-facing skills. - `dev/skills/` owns repository-development skill-like instructions that are not @@ -155,9 +158,14 @@ The GitHub-first public signal path stays deterministic and reviewable: instructions. It is not part of the installable Decodex plugin distribution. - `scripts/github/sync_latest_signals.py` discovers recent merged PRs and refreshes content artifacts. +- `docs/spec/upstream-impact.md` records how upstream Codex changes are classified for + public signals and Control Plane follow-up work. - `scripts/github/render_signal_entry.py` renders reviewed analysis drafts into site content. - `scripts/github/validate_signal_entry.py` validates the published signal collection. +- `docs/spec/social-post-draft.md` and + `docs/runbook/social-publishing-workflow.md` govern optional checked-in X drafts + before external publication. - `.github/workflows/refresh-github-signals.yml` refreshes GitHub-backed signals every hour from a trusted runner. - `.github/workflows/deploy-pages.yml` publishes the Astro site to GitHub Pages on diff --git a/artifacts/github/README.md b/artifacts/github/README.md index aea86e50..877c1db7 100644 --- a/artifacts/github/README.md +++ b/artifacts/github/README.md @@ -4,6 +4,7 @@ This directory stores checked-in GitHub signal pipeline artifacts. - `bundles/` holds normalized `github_change_bundle/v1` inputs. - `analysis/` holds reviewed Codex editorial analysis drafts. +- `impact/` holds optional `upstream_impact/v1` classifications. Executable automation for these artifacts lives under `scripts/github/`. Repo-local editorial instructions live under `dev/skills/github-signal/`. diff --git a/artifacts/github/impact/.gitkeep b/artifacts/github/impact/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/artifacts/github/impact/.gitkeep @@ -0,0 +1 @@ + diff --git a/artifacts/social/README.md b/artifacts/social/README.md new file mode 100644 index 00000000..01a96e10 --- /dev/null +++ b/artifacts/social/README.md @@ -0,0 +1,9 @@ +# Social Artifacts + +This directory stores checked-in Publisher artifacts for external social channels. + +- `x/` holds `social_post_draft/v1` drafts for X/Twitter publication. + +Drafts are review artifacts. A draft is not approved for external publication until its +`status` is `approved` under the rules in `docs/spec/social-post-draft.md` and +`docs/runbook/social-publishing-workflow.md`. diff --git a/artifacts/social/x/.gitkeep b/artifacts/social/x/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/artifacts/social/x/.gitkeep @@ -0,0 +1 @@ + diff --git a/dev/skills/github-signal/SKILL.md b/dev/skills/github-signal/SKILL.md index cd8d29ec..e31ac5e1 100644 --- a/dev/skills/github-signal/SKILL.md +++ b/dev/skills/github-signal/SKILL.md @@ -17,13 +17,16 @@ repo already renders into a final `signal_entry/v1`. ## Read before drafting - `docs/spec/github-change-bundle.md` +- `docs/spec/upstream-impact.md` - `docs/spec/signal-entry.md` +- `docs/spec/social-post-draft.md` - `docs/runbook/local-github-signal-workflow.md` ## Inputs - A normalized bundle JSON under `artifacts/github/bundles/` - An output path under `artifacts/github/analysis/` +- Optional upstream impact output under `artifacts/github/impact/` ## Boundaries @@ -31,6 +34,9 @@ repo already renders into a final `signal_entry/v1`. - Treat commits, files, and patch excerpts as evidence. - Do not summarize every commit as if it were independently important. - Publish only when the change introduces a capability, changes user-visible behavior, or offers a clear try-now path. +- Classify Control Plane impact separately from public signal worthiness when the + change touches Codex app-server, plugins, browser automation, MCP, permissions, + sandboxing, or config behavior. - Keep `why_it_matters` focused on user value, not internal mechanics. - If `how_to_try` is present, make it concrete and pair it with `expected_effect`. - When a feature is gated by `config.toml`, prefer canonical user-facing toggles over raw patch constants or PR-local token strings. @@ -123,8 +129,10 @@ Write a JSON analysis draft with these fields: 2. Read `primary_pr.title`, `primary_pr.body`, `files`, and `commits`. 3. Decide whether the change is signal-worthy. 4. Draft the editorial JSON under `artifacts/github/analysis/`. -5. Render the final signal entry with the repo script. -6. Validate the published signal collection and site build. +5. Draft an `upstream_impact/v1` artifact when the change affects Control Plane or + Publisher follow-up. +6. Render the final signal entry with the repo script. +7. Validate the published signal collection and site build. ## Commands diff --git a/docs/decisions/index.md b/docs/decisions/index.md index d7fc2723..cf34ac19 100644 --- a/docs/decisions/index.md +++ b/docs/decisions/index.md @@ -23,6 +23,9 @@ Question this index answers: "why was it designed this way?" - [`decodex-plugin-source.md`](./decodex-plugin-source.md) records why this repository owns the canonical Decodex plugin and why generic Playbook guidance should only keep portable routing. +- [`radar-control-plane-publisher.md`](./radar-control-plane-publisher.md) records the + stable capability names for upstream Codex intelligence, retained-lane orchestration, + and public publishing after the repository integration. - [`static-public-site.md`](./static-public-site.md) records why the public Decodex site remains static while runtime/operator behavior stays in the CLI and local control plane. diff --git a/docs/decisions/radar-control-plane-publisher.md b/docs/decisions/radar-control-plane-publisher.md new file mode 100644 index 00000000..2cbc8016 --- /dev/null +++ b/docs/decisions/radar-control-plane-publisher.md @@ -0,0 +1,36 @@ +# Radar, Control Plane, and Publisher + +Status: accepted + +Date: 2026-05-13 + +Question: How should the integrated Decodex repository describe the two merged +capability sets and the new public publishing workflow? + +Decision: Treat Decodex as one product with three named capability areas: + +- **Radar**: upstream Codex change intelligence. Radar owns GitHub bundle collection, + release-delta evidence, code-aware editorial analysis, and upstream impact triage. +- **Control Plane**: repo-native retained agent orchestration. Control Plane owns + registered projects, app-server integration, tracker writes, local runtime state, + operator status, review handoff, landing, closeout, and cleanup. +- **Publisher**: public static-site and social publishing surfaces. Publisher consumes + Radar outputs and produces checked-in signal entries, release-delta content, and + reviewable social post drafts for external publication. + +The temporary A/B repository labels are discussion aids only. Use the capability names +above in new documentation, issue text, schema names, and operator-facing language. + +Consequences: + +- Radar can improve Control Plane without coupling the public site to the runtime. + Upstream Codex changes that touch app-server, plugins, browser automation, MCP, + permission profiles, config, or sandbox behavior should be classified for Control + Plane impact before they become engineering work. +- Publisher remains static-first. Public pages and social drafts are generated from + checked-in artifacts and reviewed content, not from a live Decodex daemon. +- `@decodexspace` content should not duplicate a release bot. Publisher should turn + Radar evidence into practical, evidence-backed user and operator angles. +- Control Plane remains the local execution authority. Publisher content may describe + Decodex implications, but it must not claim shipped runtime behavior unless the + relevant code, docs, or release evidence exists. diff --git a/docs/index.md b/docs/index.md index fc86a977..0f4f3536 100644 --- a/docs/index.md +++ b/docs/index.md @@ -38,11 +38,17 @@ The split below is by question type, not by human-versus-agent audience. implementation surface maps -> `docs/reference/` - Need durable design rationale, packaging choices, or static-site tradeoffs -> `docs/decisions/` +- Need the current Radar, Control Plane, and Publisher capability boundary -> + `docs/decisions/radar-control-plane-publisher.md` - Need the raw machine-authored research run artifacts used by shipped research tooling -> `docs/research/` - Need reusable agent-facing Decodex usage instructions -> `plugins/decodex/` - Need the repo-local GitHub signal editorial workflow -> `dev/skills/github-signal/` plus `docs/runbook/local-github-signal-workflow.md` +- Need upstream Codex impact classification or social post draft contracts -> + `docs/spec/upstream-impact.md` and `docs/spec/social-post-draft.md` +- Need the `@decodexspace` social publishing procedure -> + `docs/runbook/social-publishing-workflow.md` - Need repository execution defaults or tracker-state policy -> registered project `WORKFLOW.md` - Need repo task names or automation entrypoints -> `Makefile.toml` @@ -60,6 +66,8 @@ The split below is by question type, not by human-versus-agent audience. - Keep the public site static by default. `site/` consumes checked-in content and generated JSON; it must not depend on a live Decodex daemon unless a later decision changes that boundary. +- Keep social publishing static-first as well. Drafts must be reviewable checked-in + artifacts before any external posting automation acts on them. - Start each document with a short routing header that says what the document is for, when to read it, and what it does not cover. - Keep links explicit and stable. diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md index 2c466020..c603f42c 100644 --- a/docs/reference/workspace-layout.md +++ b/docs/reference/workspace-layout.md @@ -21,6 +21,7 @@ should not be treated as repository source. | `scripts/github/` | Deterministic GitHub collection, normalization, render, validation, and sync scripts for public signal content. | | `scripts/config/` | Repository automation scripts for config-derived artifacts. | | `artifacts/github/` | Checked-in GitHub change bundles and editorial analysis drafts used by the public signal pipeline. | +| `artifacts/social/` | Checked-in Publisher social post drafts and publication evidence. | | `dev/skills/` | Repository-development skill-like instructions that are not part of installable plugin distribution. | | `plugins/decodex/` | Canonical installable Decodex plugin source and reusable agent-facing skills, including manual CLI, automation, commit, land, and labels. | | `docs/spec/` | Normative runtime, workflow, site, and content contracts. | @@ -79,6 +80,12 @@ editorial drafting step through the repo-local instructions at plugin distribution. Generated GitHub bundles and analysis drafts live under `artifacts/github/` and must stay explicit and checked into the repository. +`artifacts/github/impact/` may hold `upstream_impact/v1` classifications when an +upstream Codex change has public-signal, Control Plane, or Publisher implications. +`artifacts/social/` may hold `social_post_draft/v1` drafts before external publication. +Both remain checked-in review artifacts; neither turns the public site into a live +service. + ## Installable Codex surface The installable Codex home surface, including `~/.codex/AGENTS.md`, is not a Decodex diff --git a/docs/runbook/index.md b/docs/runbook/index.md index 0578c57c..fec3c2f0 100644 --- a/docs/runbook/index.md +++ b/docs/runbook/index.md @@ -33,6 +33,8 @@ Question this index answers: "which sequence should I execute?" - [`local-github-signal-workflow.md`](./local-github-signal-workflow.md) for collecting GitHub change bundles, running Codex editorial analysis, validating signal entries, and publishing static site content. +- [`social-publishing-workflow.md`](./social-publishing-workflow.md) for turning Radar + evidence into reviewed `@decodexspace` X drafts and recording publication evidence. - [`recover-review-handoff.md`](./recover-review-handoff.md) for diagnosing and explicitly rebinding retained review lanes blocked by a missing runtime DB handoff marker. diff --git a/docs/runbook/local-github-signal-workflow.md b/docs/runbook/local-github-signal-workflow.md index dd6b5fdc..dabd9281 100644 --- a/docs/runbook/local-github-signal-workflow.md +++ b/docs/runbook/local-github-signal-workflow.md @@ -21,6 +21,8 @@ Depends on: Outputs: - A validated signal entry committed to the repo +- Optional upstream-impact and social-draft artifacts when the change affects Control + Plane or external publishing - A push that allows CI to build and deploy the static site ## Workflow @@ -30,9 +32,12 @@ Outputs: 3. Run Codex analysis against the bundle with the repo-local instructions at `dev/skills/github-signal/` and save the editorial draft JSON under `artifacts/github/analysis/`. 4. Render the resulting signal entry into `site/src/content/signals/`. 5. Validate the signal entry shape and collection consistency. -6. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. -7. Review the rendered content manually in the homepage feed. -8. Push the content update and let CI build and deploy the static site. +6. Classify upstream impact when the change may affect Control Plane or Publisher. +7. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. +8. Draft optional social publishing content only through + [`social-publishing-workflow.md`](./social-publishing-workflow.md). +9. Review the rendered content manually in the homepage feed. +10. Push the content update and let CI build and deploy the static site. ## Deterministic commands @@ -117,6 +122,12 @@ For the release-delta artifact: - prefer highlighting the smaller tracked subset over trying to summarize every internal commit in the compare - do not treat prerelease notes alone as sufficient editorial evidence when the release body is empty +For upstream-impact and social-draft artifacts: + +- classify Control Plane implications before creating engineering follow-up work +- keep social drafts checked in and unposted until approval +- do not use X engagement as technical evidence + ## CI boundary The current Decodex boundary is: diff --git a/docs/runbook/social-publishing-workflow.md b/docs/runbook/social-publishing-workflow.md new file mode 100644 index 00000000..5a15d216 --- /dev/null +++ b/docs/runbook/social-publishing-workflow.md @@ -0,0 +1,120 @@ +# Social Publishing Workflow + +Goal: Turn Radar evidence into reviewable `@decodexspace` social drafts without making +the public site or X account depend on a live Decodex daemon. + +Read this when: +- You are preparing X posts about Codex releases, PRs, app updates, or usage patterns. +- You need to decide whether a Decodex signal should also produce a social draft. +- You are reviewing a `social_post_draft/v1` before external publication. + +Inputs: +- Source evidence from GitHub, OpenAI developer changelogs, checked-in signal entries, + release-delta artifacts, or verified browser observations. +- The governing schemas: + - [`../spec/upstream-impact.md`](../spec/upstream-impact.md) + - [`../spec/social-post-draft.md`](../spec/social-post-draft.md) + - [`../spec/signal-entry.md`](../spec/signal-entry.md) + +Depends on: +- [`local-github-signal-workflow.md`](./local-github-signal-workflow.md) for the + GitHub signal path. +- [`../decisions/radar-control-plane-publisher.md`](../decisions/radar-control-plane-publisher.md) + for the Radar, Control Plane, and Publisher boundary. +- [`../decisions/static-public-site.md`](../decisions/static-public-site.md) for the + static-first public surface decision. + +Outputs: +- An optional `upstream_impact/v1` artifact under `artifacts/github/impact/`. +- An optional `social_post_draft/v1` artifact under `artifacts/social/x/`. +- A published X URL only after explicit approval. + +## Style Benchmarks + +These benchmark observations are for tone and format only. They are not source evidence +for technical claims. + +| Account | Useful pattern | Decodex stance | +| --- | --- | --- | +| `@Codex_Changelog` | Fast release-aware bullets with a changelog link. | Useful for `release_pulse`, but Decodex should not become a duplicate release bot. | +| `@LLMJunky` | Practical user interpretation: how a feature changes real workflows, what is worth trying, and where limits remain. | Prefer this style when Radar evidence can support the claim quickly. | +| `@decodexspace` | Fresh account with no post history yet. | Establish a voice around evidence-backed Codex intelligence and Decodex operator impact. | + +## Workflow + +1. Start from source evidence. + - Prefer a merged PR bundle, release note, OpenAI developer changelog entry, or + already-rendered `signal_entry/v1`. + - Do not start from social engagement alone. + +2. Classify upstream impact. + - Write or update `artifacts/github/impact/.json` when the change may affect + Control Plane or Publisher. + - Use `public_signal_decision`, `control_plane_impact`, and `publisher_angle` from + [`../spec/upstream-impact.md`](../spec/upstream-impact.md). + +3. Decide whether to draft a post. + - Draft when the change has a clear `release_pulse`, `practical_explainer`, + `operator_impact`, or `watch_note` angle. + - Skip when the change is internal cleanup, too weakly sourced, too private, or too + vague for a useful reader takeaway. + +4. Create a checked-in draft. + - Write `artifacts/social/x/.json`. + - Use `schema = "social_post_draft/v1"`. + - Keep `status = "draft"` until approval. + - Keep `text[]` short enough for X, one item per post in a thread. + +5. Review the claims. + - Every user-facing claim must map to source evidence. + - Confirm the post does not imply shipped Decodex behavior without Control Plane + evidence. + - Confirm beta, rollout, platform, and config caveats are explicit. + +6. Approve or reject. + - Move `status` to `approved` only after human or explicitly routed approval. + - Keep rejected drafts as `status = "rejected"` when the rejection explains a useful + future boundary. + +7. Publish externally. + - Do not post from automation unless the draft is already `approved`. + - After posting, update the artifact to `status = "published"` and set + `published_url`. + +## Mode Guidance + +Use `release_pulse` when: + +- the release note itself is the story +- the post is mainly fast awareness +- the change does not yet justify a deeper Decodex angle + +Use `practical_explainer` when: + +- a reader can try the change in one short session +- the expected result is observable +- the value is easier to understand through workflow language than release bullets + +Use `operator_impact` when: + +- the change touches app-server, plugins, browser automation, MCP, permissions, + sandboxing, config, or runtime orchestration +- Decodex Control Plane may need to adopt, watch, or guard against the change +- the public explanation can stay honest about what Decodex has and has not shipped + +Use `watch_note` when: + +- the change is interesting but evidence is incomplete +- rollout or platform status is unclear +- a strong recommendation would overclaim + +## Guardrails + +- Do not send credentials, private issue details, or local runtime paths to X. +- Do not publish unapproved drafts. +- Do not use `@Chrome` or any browser automation to post externally without explicit + user approval for that specific post. +- Do not let social drafting bypass the static site, signal-entry, or upstream-impact + evidence chain. +- Do not quote third-party posts at length. Record style observations, not copied + content. diff --git a/docs/spec/index.md b/docs/spec/index.md index 01789bfd..3a313f98 100644 --- a/docs/spec/index.md +++ b/docs/spec/index.md @@ -64,6 +64,11 @@ Then keep the body explicit: by the static site. - [`release-delta.md`](./release-delta.md) defines the stable-versus-prerelease summary artifact used by the homepage release-delta module. +- [`upstream-impact.md`](./upstream-impact.md) defines how Radar classifies upstream + Codex changes for public signals, Control Plane follow-up, and Publisher angles. +- [`social-post-draft.md`](./social-post-draft.md) defines the checked-in social draft + artifact required before `@decodexspace` or another external social account publishes + Decodex content. - [`site-contract.md`](./site-contract.md) defines the static-site page budget, homepage obligations, and card rendering contract. - [`reset-status.md`](./reset-status.md) defines the reset-status artifact consumed by diff --git a/docs/spec/social-post-draft.md b/docs/spec/social-post-draft.md new file mode 100644 index 00000000..ba52391d --- /dev/null +++ b/docs/spec/social-post-draft.md @@ -0,0 +1,100 @@ +# Social Post Draft + +Purpose: Define the checked-in draft artifact used before Decodex publishes from the +`@decodexspace` X account or another external social channel. + +Status: normative + +Read this when: +- You are generating, reviewing, or validating social publishing drafts. +- You need to decide what evidence a post must carry before external publication. +- You are extending Publisher beyond static site signal entries. + +Not this document: +- The upstream GitHub bundle schema. Read [`github-change-bundle.md`](./github-change-bundle.md). +- The public site signal-entry schema. Read [`signal-entry.md`](./signal-entry.md). +- The social publishing procedure. Read + [`../runbook/social-publishing-workflow.md`](../runbook/social-publishing-workflow.md). + +Defines: +- The `social_post_draft/v1` artifact shape. +- Allowed post modes for Decodex Publisher. +- Review and publication state rules. + +## Artifact identity + +The canonical schema identifier is: + +- `social_post_draft/v1` + +Recommended checked-in location: + +- `artifacts/social/x/.json` + +## Required fields + +| Field | Type | Notes | +| --- | --- | --- | +| `schema` | string | Must be `social_post_draft/v1`. | +| `slug` | string | Stable URL-safe identifier for the draft. | +| `channel` | string | Must be `x` for X/Twitter drafts. | +| `target_account` | string | Account handle without URL, such as `decodexspace`. | +| `mode` | string | One value from the post-mode table. | +| `status` | string | `draft`, `approved`, `published`, or `rejected`. | +| `audience` | string | Primary reader group. | +| `text` | array | One or more post bodies, one array item per thread post. | +| `source_refs` | object | Links to signal, upstream-impact, release, PR, or changelog evidence. | +| `evidence_notes` | array | Non-empty list of evidence-backed notes that justify the post. | +| `claims` | array | Non-empty list of user-facing claims with evidence references. | + +Optional fields: + +- `published_url`: required when `status = "published"`. +- `approval`: reviewer, timestamp, and notes when `status = "approved"` or + `status = "published"`; optional rejection notes when `status = "rejected"`. +- `caveats`: rollout limits, uncertainty, platform limits, or version gates. +- `media_refs`: checked-in screenshots, videos, or generated assets used by the post. + +## Post modes + +Use exactly one `mode` value: + +| Value | Purpose | +| --- | --- | +| `release_pulse` | Short release-aware summary with a source link. | +| `practical_explainer` | Concrete user-facing explanation of how to try or reason about a feature. | +| `operator_impact` | Decodex-specific explanation of app-server, plugin, browser, MCP, sandbox, config, or orchestration implications. | +| `thread` | Multi-post explanation when one post would hide important evidence or caveats. | +| `watch_note` | Cautious note for interesting changes that are not ready for a strong recommendation. | + +`release_pulse` should be the minority path for `@decodexspace`; the account should +differ from release-only bots by preferring `practical_explainer` and `operator_impact` +drafts when evidence supports them. + +## Claim rules + +Each `claims[]` entry must include: + +- `text`: the claim visible or implied in the post. +- `evidence`: source reference key, URL, file path, or artifact path. +- `confidence`: `confirmed`, `likely`, or `weak`. + +Rules: + +- Do not publish a claim without evidence. +- Do not imply Decodex runtime support unless Control Plane evidence exists. +- Do not present a beta, hidden, or rollout-gated capability as generally available. +- Do not use a social post to replace the site signal or upstream-impact artifact. +- Do not quote third-party posts at length. Summarize style or public reaction unless + the quoted text is short and necessary. + +## Status rules + +- `draft`: generated or edited, not approved for external publication. +- `approved`: reviewed by a human or an explicitly routed approval process. +- `published`: externally posted; `published_url` is required. +- `rejected`: intentionally not publishable; keep rejection notes in `approval.notes` + or `caveats`. + +No automation may post a `draft` directly to X. External publication requires +`status = "approved"` immediately before the posting action. diff --git a/docs/spec/upstream-impact.md b/docs/spec/upstream-impact.md new file mode 100644 index 00000000..65954ed4 --- /dev/null +++ b/docs/spec/upstream-impact.md @@ -0,0 +1,110 @@ +# Upstream Impact + +Purpose: Define how Decodex classifies upstream Codex changes before they become public +signals, Control Plane follow-up work, or social publishing drafts. + +Status: normative + +Read this when: +- You are analyzing an OpenAI Codex PR, commit, release note, or developer changelog. +- You need to decide whether a Radar finding should create public content, Control + Plane work, both, or neither. +- You are designing or validating an upstream-impact artifact. + +Not this document: +- The GitHub input bundle schema. Read [`github-change-bundle.md`](./github-change-bundle.md). +- The published site signal schema. Read [`signal-entry.md`](./signal-entry.md). +- The social post draft schema. Read [`social-post-draft.md`](./social-post-draft.md). +- The operator procedure for publishing. Read + [`../runbook/social-publishing-workflow.md`](../runbook/social-publishing-workflow.md). + +Defines: +- The `upstream_impact/v1` classification shape. +- The Control Plane impact ladder. +- The Publisher angle ladder. +- Evidence and confidence rules for turning upstream changes into follow-up work. + +## Artifact identity + +The canonical schema identifier is: + +- `upstream_impact/v1` + +Recommended checked-in location: + +- `artifacts/github/impact/.json` + +## Required fields + +| Field | Type | Notes | +| --- | --- | --- | +| `schema` | string | Must be `upstream_impact/v1`. | +| `slug` | string | Stable URL-safe identifier, usually matching the source bundle stem. | +| `repo` | string | Upstream repository, such as `openai/codex`. | +| `source_refs` | object | PR, commit, release, changelog, or signal references used as evidence. | +| `observed_change` | string | Short factual description of the upstream change. | +| `public_signal_decision` | string | `publish`, `defer`, or `skip`. | +| `control_plane_impact` | string | One value from the Control Plane impact ladder. | +| `publisher_angle` | string | One value from the Publisher angle ladder. | +| `confidence` | string | `confirmed`, `likely`, or `weak`. | +| `evidence` | array | Non-empty list of source-backed evidence notes. | + +Optional fields: + +- `candidate_followups`: bounded engineering or research follow-up suggestions. +- `social_notes`: notes useful to a later `social_post_draft/v1`. +- `caveats`: uncertainty, version gating, platform limits, or rollout limits. + +## Control Plane impact ladder + +Use exactly one `control_plane_impact` value: + +| Value | Meaning | +| --- | --- | +| `none` | No plausible Control Plane implication. | +| `watch` | Worth tracking, but no concrete Decodex runtime or operator action is clear yet. | +| `candidate` | Could improve Control Plane and deserves a bounded issue or research pass. | +| `compat_risk` | May break, narrow, or change assumptions in app-server, plugin, config, permission, sandbox, browser, MCP, or tracker flows. | +| `adopt_now` | Evidence is strong enough to create an implementation issue without more discovery. | + +`compat_risk` takes precedence over `candidate` when both apply. + +## Publisher angle ladder + +Use exactly one `publisher_angle` value: + +| Value | Meaning | +| --- | --- | +| `none` | Do not use the change for external content. | +| `release_pulse` | Short release-aware awareness post. | +| `practical_explainer` | User-facing explanation of how to use or evaluate the change. | +| `operator_impact` | Decodex-specific explanation of what the change means for agent orchestration or app-server workflows. | +| `watch_note` | Cautious public note when the change is interesting but not ready for a strong claim. | + +Prefer `practical_explainer` or `operator_impact` when the evidence supports a concrete +workflow. Use `release_pulse` only when the post would otherwise be a factual release +summary. + +## Evidence rules + +- Evidence must come from source material: PR body, commit message, file path, patch + excerpt, release note, developer changelog, checked-in Decodex signal, or verified + browser observation. +- Do not infer shipped user behavior from internal names alone. +- Do not classify a change as `adopt_now` without a concrete Decodex surface that would + change. +- Do not classify a change as `practical_explainer` without a clear user-observable + path. +- Lower confidence when the source is commit-only, release-note-only, or hidden behind + private/beta rollout language. + +## Relationship to other artifacts + +`upstream_impact/v1` is an editorial bridge artifact: + +- It may consume `github_change_bundle/v1`. +- It may support a `signal_entry/v1`. +- It may support a `social_post_draft/v1`. +- It may justify a later Linear issue or implementation brief. + +It does not replace any of those artifacts. diff --git a/scripts/github/README.md b/scripts/github/README.md index a55975a7..bf42d77e 100644 --- a/scripts/github/README.md +++ b/scripts/github/README.md @@ -11,10 +11,18 @@ Current scripts: - `render_signal_entry.py` - `validate_signal_entry.py` +Current schema-only contracts: + +- `analysis_draft.schema.json` +- `upstream_impact.schema.json` +- `social_post_draft.schema.json` + Contract ownership: - input bundle shape: `docs/spec/github-change-bundle.md` - output signal shape: `docs/spec/signal-entry.md` +- upstream impact shape: `docs/spec/upstream-impact.md` +- social post draft shape: `docs/spec/social-post-draft.md` Example flow: diff --git a/scripts/github/social_post_draft.schema.json b/scripts/github/social_post_draft.schema.json new file mode 100644 index 00000000..a04c7c26 --- /dev/null +++ b/scripts/github/social_post_draft.schema.json @@ -0,0 +1,211 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Decodex social post draft", + "type": "object", + "additionalProperties": false, + "required": [ + "schema", + "slug", + "channel", + "target_account", + "mode", + "status", + "audience", + "text", + "source_refs", + "evidence_notes", + "claims" + ], + "properties": { + "schema": { + "const": "social_post_draft/v1" + }, + "slug": { + "type": "string", + "minLength": 1 + }, + "channel": { + "const": "x" + }, + "target_account": { + "type": "string", + "pattern": "^[A-Za-z0-9_]+$", + "minLength": 1 + }, + "mode": { + "type": "string", + "enum": ["release_pulse", "practical_explainer", "operator_impact", "thread", "watch_note"] + }, + "status": { + "type": "string", + "enum": ["draft", "approved", "published", "rejected"] + }, + "audience": { + "type": "string", + "minLength": 1 + }, + "text": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1, + "maxLength": 280 + } + }, + "source_refs": { + "type": "object", + "additionalProperties": false, + "anyOf": [ + { + "required": ["signals"] + }, + { + "required": ["upstream_impacts"] + }, + { + "required": ["urls"] + } + ], + "properties": { + "signals": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1 + } + }, + "upstream_impacts": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1 + } + }, + "urls": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^https://" + } + } + } + }, + "evidence_notes": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1 + } + }, + "claims": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["text", "evidence", "confidence"], + "properties": { + "text": { + "type": "string", + "minLength": 1 + }, + "evidence": { + "type": "string", + "minLength": 1 + }, + "confidence": { + "type": "string", + "enum": ["confirmed", "likely", "weak"] + } + } + } + }, + "published_url": { + "type": "string", + "pattern": "^https://" + }, + "approval": { + "type": "object", + "additionalProperties": false, + "required": ["reviewed_by", "reviewed_at", "notes"], + "properties": { + "reviewed_by": { + "type": "string", + "minLength": 1 + }, + "reviewed_at": { + "type": "string", + "minLength": 1 + }, + "notes": { + "type": "string", + "minLength": 1 + } + } + }, + "caveats": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "media_refs": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + }, + "allOf": [ + { + "if": { + "properties": { + "status": { + "const": "published" + } + } + }, + "then": { + "required": ["published_url", "approval"] + } + }, + { + "if": { + "properties": { + "status": { + "enum": ["approved", "published"] + } + } + }, + "then": { + "required": ["approval"] + } + }, + { + "if": { + "properties": { + "status": { + "const": "rejected" + } + } + }, + "then": { + "anyOf": [ + { + "required": ["approval"] + }, + { + "required": ["caveats"] + } + ] + } + } + ] +} diff --git a/scripts/github/upstream_impact.schema.json b/scripts/github/upstream_impact.schema.json new file mode 100644 index 00000000..7f27fdb0 --- /dev/null +++ b/scripts/github/upstream_impact.schema.json @@ -0,0 +1,114 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Decodex upstream impact", + "type": "object", + "additionalProperties": false, + "required": [ + "schema", + "slug", + "repo", + "source_refs", + "observed_change", + "public_signal_decision", + "control_plane_impact", + "publisher_angle", + "confidence", + "evidence" + ], + "properties": { + "schema": { + "const": "upstream_impact/v1" + }, + "slug": { + "type": "string", + "minLength": 1 + }, + "repo": { + "type": "string", + "minLength": 1 + }, + "source_refs": { + "type": "object", + "additionalProperties": false, + "required": ["items"], + "properties": { + "items": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["kind", "title", "url"], + "properties": { + "kind": { + "type": "string", + "enum": ["pull_request", "commit", "release", "changelog", "signal", "browser_observation"] + }, + "title": { + "type": "string", + "minLength": 1 + }, + "url": { + "type": "string", + "pattern": "^https://" + }, + "meta": { + "type": "string", + "minLength": 1 + } + } + } + } + } + }, + "observed_change": { + "type": "string", + "minLength": 1 + }, + "public_signal_decision": { + "type": "string", + "enum": ["publish", "defer", "skip"] + }, + "control_plane_impact": { + "type": "string", + "enum": ["none", "watch", "candidate", "compat_risk", "adopt_now"] + }, + "publisher_angle": { + "type": "string", + "enum": ["none", "release_pulse", "practical_explainer", "operator_impact", "watch_note"] + }, + "confidence": { + "type": "string", + "enum": ["confirmed", "likely", "weak"] + }, + "evidence": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1 + } + }, + "candidate_followups": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "social_notes": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "caveats": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + } +} From fb6790f38e0078c7c8d480c14004b91ea568c030 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 13 May 2026 12:07:39 +0800 Subject: [PATCH 2/6] {"schema":"decodex/commit/1","summary":"Split Codex Radar dev skills","authority":"manual"} --- README.md | 14 +- dev/skills/README.md | 32 +++ dev/skills/codex-code-analysis/SKILL.md | 95 +++++++ dev/skills/codex-release-analysis/SKILL.md | 80 ++++++ dev/skills/codex-upstream-triage/SKILL.md | 93 +++++++ dev/skills/github-signal/SKILL.md | 30 ++- dev/skills/x-post-draft/SKILL.md | 83 ++++++ docs/index.md | 5 +- docs/reference/workspace-layout.md | 12 +- .../2026-05-13_codex-radar-skill-split.json | 243 ++++++++++++++++++ docs/runbook/local-github-signal-workflow.md | 37 ++- docs/runbook/social-publishing-workflow.md | 3 + scripts/github/README.md | 5 + scripts/github/run_codex_analysis.py | 4 + 14 files changed, 703 insertions(+), 33 deletions(-) create mode 100644 dev/skills/README.md create mode 100644 dev/skills/codex-code-analysis/SKILL.md create mode 100644 dev/skills/codex-release-analysis/SKILL.md create mode 100644 dev/skills/codex-upstream-triage/SKILL.md create mode 100644 dev/skills/x-post-draft/SKILL.md create mode 100644 docs/research/2026-05-13_codex-radar-skill-split.json diff --git a/README.md b/README.md index 8355bf95..7449fb0c 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Repo-native agent orchestration and public Codex signal publishing. - Static Astro site that publishes GitHub-backed Codex change signals. - Deterministic GitHub signal pipeline for change bundles, release deltas, rendered signal entries, and content validation. +- Repo-local Radar skills for upstream Codex triage, code analysis, release analysis, + signal drafting, and X post drafting. - Publisher workflow for checked-in upstream impact classification and reviewable X drafts for `@decodexspace`. - Installable Decodex plugin with reusable agent-facing skills for manual CLI, @@ -60,8 +62,8 @@ runtime. - `artifacts/social/` owns checked-in Publisher social draft artifacts. - `plugins/decodex/` owns the installable Decodex plugin and reusable agent-facing skills. -- `dev/skills/` owns repository-development skill-like instructions that are not - packaged with the installable Decodex plugin. +- `dev/skills/` owns repository-development skills for Radar analysis and Publisher + drafting. They are not packaged with the installable Decodex plugin. - `docs/` remains the authoritative documentation surface. Runtime authority stays in `apps/decodex/src/`, the registered project contracts under @@ -154,8 +156,8 @@ The GitHub-first public signal path stays deterministic and reviewable: - `scripts/github/build_change_bundle.py` builds normalized GitHub bundles under `artifacts/github/bundles/`. -- `dev/skills/github-signal/SKILL.md` records the repo-local Codex editorial - instructions. It is not part of the installable Decodex plugin distribution. +- `dev/skills/README.md` routes the repo-local Radar and editorial instructions. They + are not part of the installable Decodex plugin distribution. - `scripts/github/sync_latest_signals.py` discovers recent merged PRs and refreshes content artifacts. - `docs/spec/upstream-impact.md` records how upstream Codex changes are classified for @@ -224,8 +226,8 @@ The tracked workspace currently keeps: validation script surface - `artifacts/github/` as checked-in GitHub bundle and analysis artifacts - `plugins/decodex/` as the canonical installable Decodex plugin source -- `dev/skills/` as repo-development skill-like instructions that are not packaged with - the installable Decodex plugin +- `dev/skills/` as repo-development Radar analysis and Publisher drafting skills that + are not packaged with the installable Decodex plugin - `docs/spec/` as the normative runtime, workflow, site, and content contract lane - `docs/runbook/` as the operator procedures, validation sequences, deployment steps, and content workflow lane diff --git a/dev/skills/README.md b/dev/skills/README.md new file mode 100644 index 00000000..d8d78409 --- /dev/null +++ b/dev/skills/README.md @@ -0,0 +1,32 @@ +# Decodex Dev Skills + +Purpose: Route repo-local development skills for the Decodex Radar and Publisher +pipeline. + +These skills are checked-in repository-development instructions. They are not packaged +with the installable Decodex plugin under `plugins/decodex/`. + +## Skill Map + +Use these skills in order when turning upstream Codex activity into Decodex content or +follow-up work: + +1. `codex-upstream-triage`: choose which upstream commits, PRs, releases, or changelog + entries deserve deeper analysis. +2. `codex-code-analysis`: read the selected upstream code or patch evidence and map it + to user-visible, Control Plane, and Publisher implications. +3. `codex-release-analysis`: evaluate release or changelog material against commits, + PRs, release-delta artifacts, and already-published Decodex signals. +4. `github-signal`: turn the reviewed GitHub bundle and analysis result into the + `analysis_draft` JSON consumed by `scripts/github/render_signal_entry.py`. +5. `x-post-draft`: turn evidence-backed Radar output into a reviewable + `social_post_draft/v1` artifact for `@decodexspace`. + +Use only the skills needed for the current artifact. Do not create a social draft just +because a signal exists. + +Only the existing checked-in contracts are durable artifacts today: +`github_change_bundle/v1`, `analysis_draft`, `signal_entry/v1`, `upstream_impact/v1`, +`release_delta/v1`, and `social_post_draft/v1`. The triage, code-analysis, and +release-analysis skills are reasoning passes unless their conclusions are promoted +into one of those contracts. diff --git a/dev/skills/codex-code-analysis/SKILL.md b/dev/skills/codex-code-analysis/SKILL.md new file mode 100644 index 00000000..8514643e --- /dev/null +++ b/dev/skills/codex-code-analysis/SKILL.md @@ -0,0 +1,95 @@ +--- +name: codex-code-analysis +description: Use when reading upstream OpenAI Codex PR, commit, file, or patch evidence to understand what changed, whether it is user-visible, and how it affects Decodex Radar, Control Plane, Publisher, or follow-up engineering. +--- + +# Decodex Codex Code Analysis + +Use this skill after upstream triage chooses a candidate. Its job is to turn source +evidence into a defensible interpretation, not to rewrite release notes. + +This is a Decodex repository-development instruction surface, not an installable +Decodex plugin skill. + +## Read Before Analysis + +- `docs/spec/github-change-bundle.md` +- `docs/spec/upstream-impact.md` +- `docs/spec/signal-entry.md` +- `dev/skills/README.md` + +## Inputs + +- A `github_change_bundle/v1` under `artifacts/github/bundles/`, or enough GitHub PR or + commit evidence to request or create one +- Optional release or changelog context +- Optional existing Decodex signal, upstream-impact, or release-delta artifacts + +This skill does not define a new checked-in artifact. Keep the result in-session +unless it is promoted into an existing `analysis_draft`, `upstream_impact/v1`, or +`social_post_draft/v1` contract. + +## Analysis Loop + +1. Identify the changed surface. + - Public API/protocol/schema + - CLI/TUI/app-server behavior + - Config, permission, sandbox, auth, provider, hook, or plugin behavior + - Docs, examples, tests, or internal-only refactor + +2. Follow the runtime path. + - Start from the PR title/body when `analysis_mode = "pr_first"`. + - Use changed files and patch excerpts to locate the actual behavior boundary. + - Use tests and docs as confirmation, not as the only proof. + - Read enough surrounding code to know whether the change is shipped behavior, + plumbing, guardrail, or cleanup. + +3. Map implications. + - User path: what a normal Codex user can observe or try. + - Control Plane path: what Decodex runtime, app-server integration, plugin routing, + tracker tooling, or automation policy may need to adopt or guard. + - Publisher path: what can be explained publicly without overclaiming. + +4. Assign confidence. + - `confirmed`: source patch plus tests, docs, schema, CLI help, or public release + evidence point to the same behavior. + - `likely`: code strongly implies behavior, but no public docs or direct test covers + the exact user path. + - `weak`: evidence is names, commit titles, sparse release notes, or incomplete patch + excerpts. + +## Evidence Standards + +Prefer concrete anchors: + +- changed protocol/schema files +- config-schema or CLI flag changes +- tests that exercise visible behavior +- docs or examples that describe the behavior +- app-server, plugin, MCP, browser, sandbox, hook, auth, provider, or tool-handler code + +Do not treat these as enough by themselves: + +- internal file names +- generic commit titles +- social engagement +- release bodies that only repeat the version number +- TODOs or comments without behavior + +## Output + +Return an analysis note that can feed `github-signal`, `codex-release-analysis`, or +`upstream_impact/v1`: + +- one-sentence observed change +- changed surface classification +- evidence anchors +- user-visible path, if any +- Control Plane impact, if any +- Publisher angle, if any +- confidence and caveats +- recommended next artifact: `none`, `analysis_draft` through `github-signal`, + `upstream_impact/v1`, or `social_post_draft/v1` + +Keep the note shorter than the source patch. Explain the behavior path, not every +changed file. diff --git a/dev/skills/codex-release-analysis/SKILL.md b/dev/skills/codex-release-analysis/SKILL.md new file mode 100644 index 00000000..21cba5af --- /dev/null +++ b/dev/skills/codex-release-analysis/SKILL.md @@ -0,0 +1,80 @@ +--- +name: codex-release-analysis +description: Use when evaluating upstream Codex releases, prereleases, app updates, or changelog entries and deciding what Decodex should publish, watch, or absorb from the release. +--- + +# Decodex Codex Release Analysis + +Use this skill when the source is release-shaped: GitHub releases, prerelease tags, +OpenAI developer changelogs, app update notes, or a release-focused social post. + +This is a Decodex repository-development instruction surface, not an installable +Decodex plugin skill. + +## Read Before Analysis + +- `docs/spec/release-delta.md` +- `docs/spec/signal-entry.md` +- `docs/spec/upstream-impact.md` +- `docs/runbook/local-github-signal-workflow.md` +- `dev/skills/codex-upstream-triage/SKILL.md` +- `dev/skills/codex-code-analysis/SKILL.md` + +## Inputs + +- Release tag, changelog URL, or app update URL +- Existing `release_delta/v1` artifact, when available +- Existing Decodex signals that may explain the release delta +- GitHub compare, commit, or PR evidence for any claim beyond the release headline + +This skill is an advisory reasoning pass. It does not define a new checked-in +release-analysis artifact and does not replace deterministic `release_delta/v1` +generation. + +## Release Reading Rules + +- Treat release notes as discovery, not proof, when they are sparse. +- Use GitHub compare data and PR mappings to explain what changed between stable and + prerelease tags. +- Prefer already-published `signal_entry/v1` items when they match the compare commit + or PR evidence. +- Do not imply a feature is broadly available when the source says alpha, beta, + rollout, platform-gated, or config-gated. +- Do not write a release recap that only duplicates a release bot unless there is no + deeper evidence-backed angle. + +## Analysis Modes + +Use exactly one primary mode: + +| Mode | Use when | Output | +| --- | --- | --- | +| `release_pulse` | The release headline is the story and evidence is thin. | Short awareness note or social draft. | +| `delta_explainer` | Compare commits map to existing signals or clear PRs. | Refresh existing `release_delta/v1` and summarize the evidence. | +| `operator_impact` | Release changes app-server, plugins, browser, MCP, permissions, sandbox, hooks, config, auth, or providers. | `upstream_impact/v1` plus possible follow-up issue. | +| `watch_note` | The release is interesting but evidence is incomplete. | Watch note with caveats. | + +## Style Lessons + +- Release-bot style is useful for speed: version, three bullets, source link. +- Human analysis style is useful for value: what changes in a real workflow, why it + matters, what to try, and where the limit remains. +- Decodex should prefer the human-analysis shape whenever source evidence supports it. + +## Output + +Return: + +- release source and timestamp +- whether the release body is explanatory or sparse +- compare or PR evidence used +- matching Decodex signal slugs, if any +- chosen mode +- user-facing takeaway +- Control Plane impact, if any +- Publisher recommendation: no post, `release_pulse`, `practical_explainer`, + `operator_impact`, or `watch_note` + +Promote durable conclusions into existing artifacts only: `upstream_impact/v1`, +`analysis_draft` plus rendered `signal_entry/v1`, refreshed `release_delta/v1`, or +`social_post_draft/v1`. diff --git a/dev/skills/codex-upstream-triage/SKILL.md b/dev/skills/codex-upstream-triage/SKILL.md new file mode 100644 index 00000000..4ee8ce73 --- /dev/null +++ b/dev/skills/codex-upstream-triage/SKILL.md @@ -0,0 +1,93 @@ +--- +name: codex-upstream-triage +description: Use when scanning latest upstream OpenAI Codex commits, PRs, releases, or changelog entries to decide which items deserve a GitHub bundle, code analysis, upstream-impact classification, site signal, or social draft. +--- + +# Decodex Codex Upstream Triage + +Use this skill before deep analysis. Its job is to keep Radar fast and selective: find +candidate upstream Codex changes, group them correctly, and choose the next artifact. + +This is a Decodex repository-development instruction surface, not an installable +Decodex plugin skill. + +## Read Before Triage + +- `docs/spec/github-change-bundle.md` +- `docs/spec/upstream-impact.md` +- `docs/runbook/local-github-signal-workflow.md` +- `dev/skills/codex-code-analysis/SKILL.md` +- `dev/skills/codex-release-analysis/SKILL.md` + +## Inputs + +- Upstream repository, normally `openai/codex` +- A time window, release tag, PR number, commit SHA, or changelog URL +- Optional existing Decodex signal or release-delta artifacts + +## Retrieval Order + +Use the lightest source that can answer the triage question: + +1. GitHub release or compare metadata when the user asks about a release. +2. GitHub PR metadata when a PR number is known. +3. GitHub commit metadata when only a SHA is known. +4. Upstream changelog or browser observation when the question is about public product + framing. + +For a latest-commit pass, list recent upstream commits first, then resolve promising +commits back to PRs before building bundles. A commit list is a queue, not final +evidence. + +## Candidate Ladder + +Classify each item as exactly one: + +| Decision | Meaning | Next step | +| --- | --- | --- | +| `skip` | Internal churn, no safe user or Decodex implication. | Record nothing durable. | +| `watch` | Interesting but too weak, too hidden, or too broad. | Optional `upstream_impact/v1` with `control_plane_impact = "watch"`. | +| `bundle` | Enough GitHub context exists for code analysis. | Build or reuse a `github_change_bundle/v1`. | +| `release_review` | Release or changelog framing needs comparison against commits and signals. | Use `codex-release-analysis`. | +| `style_reference` | Useful only as style or audience evidence. | Save no technical artifact; use only as optional style context when a separate source-backed draft exists. | + +## Grouping Rules + +- Prefer PR-first grouping over individual commit grouping whenever a commit maps to a + merged PR. +- Group adjacent commits when they share the same PR, feature area, or release note. +- Do not split a multi-commit PR into separate signals unless the PR clearly ships + multiple independently useful user paths. +- Treat sparse release bodies such as a title-only prerelease as an index into commits, + not as enough evidence for `confirmed` claims. + +## Radar Triggers + +Escalate to `codex-code-analysis` when changed files or release text mention: + +- app-server, app-server protocol, remote control, or websocket transport +- plugins, MCP, tool search, browser automation, or Chrome integration +- sandboxing, permissions, approval policy, hooks, or config schemas +- model providers, auth, accounts, or rate-limit behavior +- CLI/TUI behavior visible to a normal Codex user + +Escalate to `codex-release-analysis` when the source is a release, prerelease, app +update, or public changelog. + +Escalate to `x-post-draft` only after there is technical source evidence and a clear +Publisher angle. Style references from X must not start a social draft by themselves. + +## Output + +Return a compact triage note with: + +- source URLs and timestamps +- grouped candidate IDs +- triage decision for each group +- why skipped items were skipped +- next skill to use +- confidence limits + +Do not draft `signal_entry/v1` or `social_post_draft/v1` directly from this skill. +Do not treat this note as a durable repository artifact unless a later change adds a +schema, path, and validator for it. diff --git a/dev/skills/github-signal/SKILL.md b/dev/skills/github-signal/SKILL.md index e31ac5e1..4a7f90af 100644 --- a/dev/skills/github-signal/SKILL.md +++ b/dev/skills/github-signal/SKILL.md @@ -1,18 +1,19 @@ --- name: github-signal -description: Use when turning a normalized GitHub bundle under `artifacts/github/bundles/` into a Decodex signal draft, especially for requests to analyze a PR-first bundle, decide if a change is signal-worthy, or write/update the local editorial analysis JSON that feeds `scripts/github/render_signal_entry.py`. +description: Use when turning a reviewed GitHub bundle and code-analysis result into a Decodex signal draft, especially for writing or updating the local editorial analysis JSON that feeds `scripts/github/render_signal_entry.py`. --- # Decodex GitHub Signal -Use this skill for the local editorial step in the GitHub-first Decodex workflow. +Use this skill for the final local editorial step in the GitHub-first Decodex workflow. This is a Decodex repository-development instruction surface, not a complete user-facing plugin skill, and it must not be packaged with the installable Decodex plugin. This skill does not replace the deterministic scripts. It tells Codex how to read a -bundle, decide whether it deserves publication, and draft the analysis JSON that the -repo already renders into a final `signal_entry/v1`. +reviewed bundle and in-session code-analysis result, decide whether the change deserves +publication, and draft the analysis JSON that the repo already renders into a final +`signal_entry/v1`. ## Read before drafting @@ -21,13 +22,27 @@ repo already renders into a final `signal_entry/v1`. - `docs/spec/signal-entry.md` - `docs/spec/social-post-draft.md` - `docs/runbook/local-github-signal-workflow.md` +- `dev/skills/codex-upstream-triage/SKILL.md` +- `dev/skills/codex-code-analysis/SKILL.md` ## Inputs - A normalized bundle JSON under `artifacts/github/bundles/` +- A code-analysis result from `dev/skills/codex-code-analysis/SKILL.md`, when the + behavior path is not already clear from the bundle - An output path under `artifacts/github/analysis/` - Optional upstream impact output under `artifacts/github/impact/` +## Companion Skill Routing + +- Use `codex-upstream-triage` before this skill when the candidate still needs to be + selected from latest commits, PRs, releases, or changelog entries. +- Use `codex-code-analysis` before this skill when the behavior path or Control Plane + impact is not already clear. +- Use `codex-release-analysis` before this skill when the source is release-shaped. +- Use `x-post-draft` after this skill only when the rendered signal or upstream-impact + artifact supports a social draft. + ## Boundaries - Treat the PR as the main narrative container. @@ -126,10 +141,11 @@ Write a JSON analysis draft with these fields: ## Workflow 1. Validate the bundle first. -2. Read `primary_pr.title`, `primary_pr.body`, `files`, and `commits`. +2. Read `primary_pr.title`, `primary_pr.body`, `files`, `commits`, and the companion + in-session code-analysis result when one was produced. 3. Decide whether the change is signal-worthy. -4. Draft the editorial JSON under `artifacts/github/analysis/`. -5. Draft an `upstream_impact/v1` artifact when the change affects Control Plane or +4. Draft the `analysis_draft` JSON under `artifacts/github/analysis/`. +5. Draft or update an `upstream_impact/v1` artifact when the change affects Control Plane or Publisher follow-up. 6. Render the final signal entry with the repo script. 7. Validate the published signal collection and site build. diff --git a/dev/skills/x-post-draft/SKILL.md b/dev/skills/x-post-draft/SKILL.md new file mode 100644 index 00000000..76b12fd9 --- /dev/null +++ b/dev/skills/x-post-draft/SKILL.md @@ -0,0 +1,83 @@ +--- +name: x-post-draft +description: Use when turning Decodex Radar evidence, upstream-impact classifications, signal entries, release analysis, or verified browser style observations into a checked-in social_post_draft/v1 artifact for X. +--- + +# Decodex X Post Draft + +Use this skill after source evidence exists. Its job is to create reviewable X draft +artifacts, not to publish posts. + +This is a Decodex repository-development instruction surface, not an installable +Decodex plugin skill. + +## Read Before Drafting + +- `docs/spec/social-post-draft.md` +- `docs/spec/upstream-impact.md` +- `docs/runbook/social-publishing-workflow.md` +- `dev/skills/codex-release-analysis/SKILL.md` +- `dev/skills/codex-code-analysis/SKILL.md` + +## Inputs + +- `signal_entry/v1`, `upstream_impact/v1`, release-analysis note, or checked source URLs +- Optional style observations from `@Codex_Changelog`, `@LLMJunky`, or `@decodexspace` +- Target account, normally `decodexspace` + +## Browser Boundary + +Use `@Chrome` only for reading public pages or verifying rendered posts. Do not type +into an X composer, save a web draft, or post externally unless the user explicitly +approves the specific draft and the artifact is already `status = "approved"`. + +Style observations from X are not technical evidence. They can shape format and tone, +but every technical claim must point back to GitHub, changelog, signal, or +upstream-impact evidence. + +## Benchmark Patterns + +Use these as format patterns only: + +| Pattern | Good for | Decodex adaptation | +| --- | --- | --- | +| Release-bot bullet | Fast `release_pulse` posts. | Version or source headline, two or three evidence-backed bullets, source link. | +| Human workflow read | `practical_explainer` and `operator_impact`. | Start with the concrete workflow change, then explain why it matters and what caveat remains. | +| Watch note | Interesting but incomplete evidence. | Say what changed, why Radar is watching, and what evidence is still missing. | + +## Draft Modes + +Choose exactly one `mode` from `social_post_draft/v1`: + +- `release_pulse`: short release-aware summary with source link. +- `practical_explainer`: concrete user workflow and expected result. +- `operator_impact`: Decodex Control Plane implication. +- `thread`: multi-post explanation when one post hides evidence or caveats. +- `watch_note`: cautious public note for incomplete evidence. + +`@decodexspace` should mostly use `practical_explainer` and `operator_impact`. +Use `release_pulse` only when the release itself is the useful alert. + +## Claim Review + +Before writing the artifact: + +- Map every sentence to evidence. +- Remove claims based only on social posts or engagement. +- Make beta, rollout, platform, and config gates explicit. +- Avoid local paths, credentials, private issue details, or internal runtime state. +- Keep each `text[]` item within the X length limit. + +## Output + +Write or propose `artifacts/social/x/.json` with: + +- `schema = "social_post_draft/v1"` +- `channel = "x"` +- `target_account = "decodexspace"` unless requested otherwise +- `status = "draft"` +- `source_refs`, `evidence_notes`, and `claims` +- `caveats` when confidence is not fully confirmed + +Do not update the artifact to `approved` or `published` from this skill unless the user +explicitly asks for that state change. diff --git a/docs/index.md b/docs/index.md index 0f4f3536..a26d3e3f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -43,8 +43,9 @@ The split below is by question type, not by human-versus-agent audience. - Need the raw machine-authored research run artifacts used by shipped research tooling -> `docs/research/` - Need reusable agent-facing Decodex usage instructions -> `plugins/decodex/` -- Need the repo-local GitHub signal editorial workflow -> `dev/skills/github-signal/` - plus `docs/runbook/local-github-signal-workflow.md` +- Need repo-local Radar skills for upstream Codex triage, code analysis, release + analysis, GitHub signal drafting, or X post drafting -> `dev/skills/` plus + `docs/runbook/local-github-signal-workflow.md` - Need upstream Codex impact classification or social post draft contracts -> `docs/spec/upstream-impact.md` and `docs/spec/social-post-draft.md` - Need the `@decodexspace` social publishing procedure -> diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md index c603f42c..b76a7019 100644 --- a/docs/reference/workspace-layout.md +++ b/docs/reference/workspace-layout.md @@ -22,7 +22,7 @@ should not be treated as repository source. | `scripts/config/` | Repository automation scripts for config-derived artifacts. | | `artifacts/github/` | Checked-in GitHub change bundles and editorial analysis drafts used by the public signal pipeline. | | `artifacts/social/` | Checked-in Publisher social post drafts and publication evidence. | -| `dev/skills/` | Repository-development skill-like instructions that are not part of installable plugin distribution. | +| `dev/skills/` | Repository-development skills for Radar upstream triage, code analysis, release analysis, GitHub signal drafting, and X post drafting. These are not part of installable plugin distribution. | | `plugins/decodex/` | Canonical installable Decodex plugin source and reusable agent-facing skills, including manual CLI, automation, commit, land, and labels. | | `docs/spec/` | Normative runtime, workflow, site, and content contracts. | | `docs/runbook/` | Operator procedures, validation sequences, deployment steps, and content workflows. | @@ -74,10 +74,12 @@ Those runtime and operator surfaces stay in `apps/decodex/` and `docs/spec/`. ## GitHub signal tooling -`scripts/github/` owns deterministic content scripts. It may call Codex for the -editorial drafting step through the repo-local instructions at -`dev/skills/github-signal/`, but that surface is not part of the installable Decodex -plugin distribution. Generated GitHub bundles and analysis drafts live under +`scripts/github/` owns deterministic content scripts. Its automated Codex step may +apply the repo-local code-analysis and GitHub-signal instructions under `dev/skills/` +to produce the existing `analysis_draft` JSON consumed by `render_signal_entry.py`. +The broader upstream triage, release-analysis, and X-drafting skills are manual +Radar/Publisher reasoning surfaces unless a later script explicitly wires them into a +checked-in contract. Generated GitHub bundles and analysis drafts live under `artifacts/github/` and must stay explicit and checked into the repository. `artifacts/github/impact/` may hold `upstream_impact/v1` classifications when an diff --git a/docs/research/2026-05-13_codex-radar-skill-split.json b/docs/research/2026-05-13_codex-radar-skill-split.json new file mode 100644 index 00000000..4fcfd697 --- /dev/null +++ b/docs/research/2026-05-13_codex-radar-skill-split.json @@ -0,0 +1,243 @@ +{ + "schema": "research-run/2", + "run_id": "2026-05-13_codex-radar-skill-split", + "question": "How should Decodex read upstream Codex code, latest commits, releases, and public posting patterns so Radar output can improve Control Plane and Publisher work without overclaiming?", + "success_criteria": [ + "Separate fast upstream triage from code-aware analysis and final signal drafting.", + "Preserve current deterministic artifact boundaries for GitHub bundles, analysis drafts, rendered signals, upstream-impact classifications, release deltas, and social drafts.", + "Use X account observations as style benchmarks only, not technical evidence.", + "End with a skill split that can be used manually now and can be wired into automation incrementally." + ], + "constraints": [ + "Do not make X posts or operate an X composer from the research pass.", + "Do not introduce new durable intermediate artifact schemas unless validators and gates are added in the same change.", + "Do not imply sparse upstream release bodies prove product behavior.", + "Keep installable Decodex plugin skills separate from repo-local development skills under dev/skills/." + ], + "stop_rule": "Stop once the skill routing decision is clear enough to update repo-local skills, docs, and the current Codex analysis prompt without changing the social publication approval boundary.", + "primary_hypothesis": "Decodex should split Radar work into repo-local reasoning skills for upstream triage, code analysis, release analysis, and X drafting, while keeping github-signal as the final analysis_draft authoring step and avoiding new intermediate artifacts.", + "rival_hypotheses": [ + "Keep one large github-signal skill for the whole workflow.", + "Create first-class checked-in triage, code-analysis, and release-analysis artifacts immediately.", + "Treat release posts and X posts as sufficient source material for public Decodex claims." + ], + "falsifiers": [ + "If the existing automation already consumes separate triage, code-analysis, and release-analysis artifacts, the manual-only split would understate current behavior.", + "If X style observations can produce a valid social_post_draft/v1 without GitHub, changelog, signal, or upstream-impact evidence, the evidence boundary would be too strict.", + "If sparse release bodies contain enough detail to support confirmed claims, the compare-first release rule would be too conservative." + ], + "coverage": { + "mode": "standard", + "min_source_families": 0 + }, + "continuation": { + "mode": "manual_if_not_decision_ready", + "attempt": 1, + "max_attempts": 1, + "session_id": "2026-05-13_codex-radar-skill-split" + }, + "events": [ + { + "seq": 1, + "type": "probe_completed", + "remaining_option_count": 3, + "independent_option_questions": [ + "Should analysis be one skill or a small staged set of repo-local skills?", + "Should intermediate triage and code-analysis outputs become durable artifacts now?", + "How should public X posting style benchmarks influence Decodex output without becoming source evidence?" + ], + "external_slices": [ + "Latest openai/codex GitHub commits and release metadata.", + "Public X account posting patterns for @Codex_Changelog, @LLMJunky, and @decodexspace." + ] + }, + { + "seq": 2, + "type": "evidence_recorded", + "evidence": [ + { + "id": "E1", + "kind": "observation", + "summary": "The latest upstream Codex commits observed on 2026-05-13 included tool-search handler encapsulation, app-server websocket listener restoration with an auth guard, plugin version/share gating, hook requirement changes, provider routing, and cleanup of an SSE fixture hook.", + "source_family": "github_api", + "source_locator": "https://api.github.com/repos/openai/codex/commits?per_page=8" + }, + { + "id": "E2", + "kind": "observation", + "summary": "Commit 51bfb5f3b115 restored an app-server websocket listener with an auth guard and touched app-server transport/auth code, which is relevant to Decodex Control Plane integration.", + "source_family": "github_api", + "source_locator": "https://github.com/openai/codex/commit/51bfb5f3b115" + }, + { + "id": "E3", + "kind": "observation", + "summary": "Commit d1430fd61e4a exposed plugin versions and gated plugin sharing, touching app-server protocol schemas and plugin summary/share surfaces.", + "source_family": "github_api", + "source_locator": "https://github.com/openai/codex/commit/d1430fd61e4a" + }, + { + "id": "E4", + "kind": "observation", + "summary": "Commit 104fc1495646 encapsulated tool-search entries in handlers and touched tool-search handler and registry code, a likely Radar trigger but still requiring code-path analysis before publication.", + "source_family": "github_api", + "source_locator": "https://github.com/openai/codex/commit/104fc1495646" + }, + { + "id": "E5", + "kind": "observation", + "summary": "Recent rust-v0.131.0 alpha GitHub release bodies were title-like and sparse, so release metadata alone is insufficient evidence for confirmed feature claims.", + "source_family": "github_release_api", + "source_locator": "https://api.github.com/repos/openai/codex/releases" + }, + { + "id": "E6", + "kind": "observation", + "summary": "@Codex_Changelog uses fast automated release-bullet posts: version headline, a few feature bullets, and source links. This is useful for release_pulse formatting but not enough for deeper Decodex claims.", + "source_family": "public_x_observation", + "source_locator": "https://x.com/Codex_Changelog" + }, + { + "id": "E7", + "kind": "observation", + "summary": "@LLMJunky posts more practical Codex workflow analysis, focusing on what a user can try, why it matters in real usage, and where the limitation remains.", + "source_family": "public_x_observation", + "source_locator": "https://x.com/LLMJunky" + }, + { + "id": "E8", + "kind": "observation", + "summary": "@decodexspace had no observed posts in the public page pass, so Decodex can define its own evidence-backed style rather than preserving existing account conventions.", + "source_family": "public_x_observation", + "source_locator": "https://x.com/decodexspace" + }, + { + "id": "E9", + "kind": "observation", + "summary": "The current automation path discovers recent merged PRs with title scoring, builds a GitHub bundle, runs Codex to produce an analysis_draft JSON, renders a signal_entry/v1, validates signal entries, and refreshes release_delta/v1.", + "source_family": "decodex_repo", + "source_path": "scripts/github/sync_latest_signals.py; scripts/github/run_codex_analysis.py; scripts/github/render_signal_entry.py" + }, + { + "id": "E10", + "kind": "observation", + "summary": "The durable content contracts currently present for this workflow are GitHub bundles, analysis drafts, rendered signal entries, upstream-impact classifications, release deltas, and social post drafts; no triage-note or code-analysis-note schema is present.", + "source_family": "decodex_repo", + "source_path": "docs/spec/github-change-bundle.md; scripts/github/analysis_draft.schema.json; docs/spec/signal-entry.md; docs/spec/upstream-impact.md; docs/spec/release-delta.md; docs/spec/social-post-draft.md" + } + ] + }, + { + "seq": 3, + "type": "tradeoffs_recorded", + "tradeoffs": [ + { + "id": "T1", + "summary": "Splitting the workflow into staged repo-local skills improves selectivity and lets Radar distinguish commit triage, behavior reading, release interpretation, final signal drafting, and public post drafting.", + "supporting_evidence_ids": [ + "E1", + "E2", + "E3", + "E4", + "E9" + ], + "disconfirming_evidence_ids": [] + }, + { + "id": "T2", + "summary": "Creating durable triage or code-analysis artifacts now would add contract and gate obligations that the current scripts do not satisfy.", + "supporting_evidence_ids": [ + "E9", + "E10" + ], + "disconfirming_evidence_ids": [] + }, + { + "id": "T3", + "summary": "Release analysis should compare sparse release metadata against commits, PRs, existing signals, and release_delta/v1 rather than duplicating release-bot summaries.", + "supporting_evidence_ids": [ + "E5", + "E6", + "E9" + ], + "disconfirming_evidence_ids": [] + }, + { + "id": "T4", + "summary": "X observations are useful for cadence, structure, and tone, but all Decodex technical claims still need GitHub, changelog, signal, or upstream-impact evidence.", + "supporting_evidence_ids": [ + "E6", + "E7", + "E8", + "E10" + ], + "disconfirming_evidence_ids": [] + } + ] + }, + { + "seq": 4, + "type": "judgment_candidate_created", + "judgment_payload": { + "judgment_type": "recommend", + "preferred_option": "split-repo-local-radar-skills-with-existing-artifact-boundaries", + "rejected_options": [ + "single-large-github-signal-skill", + "new-durable-intermediate-artifacts-now", + "social-or-release-posts-as-technical-source-evidence" + ], + "decision_claim": "Split Decodex Radar into manual reasoning skills for upstream triage, code analysis, release analysis, and X drafting; keep durable outputs limited to existing checked-in contracts; wire only in-session code-analysis into run_codex_analysis prompt for current automation.", + "key_evidence_ids": [ + "E1", + "E5", + "E6", + "E7", + "E9", + "E10" + ], + "key_tradeoff_ids": [ + "T1", + "T2", + "T3", + "T4" + ] + }, + "judgment_hash": "sha256:a100c2d386d45be8039eaa5aba398de72287d58e95258cef87df6fb224179d3a" + }, + { + "seq": 5, + "type": "worker_completed", + "worker": "skeptic", + "target_judgment_hash": "sha256:a100c2d386d45be8039eaa5aba398de72287d58e95258cef87df6fb224179d3a", + "summary": "The split is useful only if it preserves artifact boundaries, avoids undocumented intermediate outputs, does not imply current automation already orchestrates all new skills, and blocks style-only social drafting.", + "objections": [ + { + "id": "OBJ-001-artifact-seam-drift", + "summary": "Keep github-signal scoped to analysis_draft authoring, not direct signal_entry/v1 production." + }, + { + "id": "OBJ-002-missing-intermediate-artifact-contracts", + "summary": "Either keep triage/code/release notes non-durable or add paths, schemas, validators, and gates." + }, + { + "id": "OBJ-003-automation-doc-drift", + "summary": "Docs must not imply current scripts already orchestrate all new skills unless scripts are wired." + }, + { + "id": "OBJ-004-style-only-social-bypass", + "summary": "A style-only X observation must not lead directly to a social_post_draft/v1." + }, + { + "id": "OBJ-005-release-analysis-overlap", + "summary": "Release analysis must feed existing upstream_impact, signal, release_delta, or social draft contracts instead of creating duplicate judgment." + } + ] + }, + { + "seq": 6, + "type": "finalized_decision_ready", + "judgment_hash": "sha256:a100c2d386d45be8039eaa5aba398de72287d58e95258cef87df6fb224179d3a", + "summary": "Decision-ready: split the repo-local Radar skills, keep triage/code/release outputs as non-durable reasoning passes, wire in-session code-analysis into the current analysis_draft prompt, and require source-backed evidence before X drafting." + } + ] +} diff --git a/docs/runbook/local-github-signal-workflow.md b/docs/runbook/local-github-signal-workflow.md index dabd9281..fad1d760 100644 --- a/docs/runbook/local-github-signal-workflow.md +++ b/docs/runbook/local-github-signal-workflow.md @@ -18,6 +18,7 @@ Depends on: - `docs/spec/signal-entry.md` - `docs/spec/release-delta.md` - `docs/spec/site-contract.md` +- `dev/skills/README.md` Outputs: - A validated signal entry committed to the repo @@ -27,17 +28,24 @@ Outputs: ## Workflow -1. Build a normalized GitHub change bundle under `artifacts/github/bundles/`. -2. Review the bundle and decide whether the change is signal-worthy. -3. Run Codex analysis against the bundle with the repo-local instructions at `dev/skills/github-signal/` and save the editorial draft JSON under `artifacts/github/analysis/`. -4. Render the resulting signal entry into `site/src/content/signals/`. -5. Validate the signal entry shape and collection consistency. -6. Classify upstream impact when the change may affect Control Plane or Publisher. -7. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. -8. Draft optional social publishing content only through +1. Triage upstream Codex activity with `dev/skills/codex-upstream-triage/` when the + candidate is not already chosen by automation or by the operator. +2. Build a normalized GitHub change bundle under `artifacts/github/bundles/` for + selected candidates. +3. Analyze source behavior with `dev/skills/codex-code-analysis/` as an in-session + reasoning pass; do not create a separate checked-in artifact for this pass. +4. Use `dev/skills/codex-release-analysis/` when the source is a release, prerelease, + app update, or changelog entry. +5. Run final signal drafting with `dev/skills/github-signal/` and save the + `analysis_draft` JSON under `artifacts/github/analysis/`. +6. Render the resulting signal entry into `site/src/content/signals/`. +7. Validate the signal entry shape and collection consistency. +8. Classify upstream impact when the change may affect Control Plane or Publisher. +9. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. +10. Draft optional social publishing content only through [`social-publishing-workflow.md`](./social-publishing-workflow.md). -9. Review the rendered content manually in the homepage feed. -10. Push the content update and let CI build and deploy the static site. +11. Review the rendered content manually in the homepage feed. +12. Push the content update and let CI build and deploy the static site. ## Deterministic commands @@ -93,10 +101,13 @@ The repository already includes a real sample for this flow: Repo-local editorial instruction entrypoint: -- `dev/skills/github-signal/SKILL.md` +- `dev/skills/README.md` -This entrypoint is for Decodex repository development only. It is incomplete as a -general user-facing skill and must not be packaged with the installable Decodex plugin. +These entrypoints are for Decodex repository development only. They are incomplete as +general user-facing skills and must not be packaged with the installable Decodex +plugin. Today only `github_change_bundle/v1`, `analysis_draft`, `signal_entry/v1`, +`upstream_impact/v1`, `release_delta/v1`, and `social_post_draft/v1` are durable +content contracts for this workflow. Automated hourly sync entrypoint: diff --git a/docs/runbook/social-publishing-workflow.md b/docs/runbook/social-publishing-workflow.md index 5a15d216..262a2c11 100644 --- a/docs/runbook/social-publishing-workflow.md +++ b/docs/runbook/social-publishing-workflow.md @@ -19,6 +19,8 @@ Inputs: Depends on: - [`local-github-signal-workflow.md`](./local-github-signal-workflow.md) for the GitHub signal path. +- [`../../dev/skills/x-post-draft/SKILL.md`](../../dev/skills/x-post-draft/SKILL.md) + for the repo-local drafting method. - [`../decisions/radar-control-plane-publisher.md`](../decisions/radar-control-plane-publisher.md) for the Radar, Control Plane, and Publisher boundary. - [`../decisions/static-public-site.md`](../decisions/static-public-site.md) for the @@ -60,6 +62,7 @@ for technical claims. vague for a useful reader takeaway. 4. Create a checked-in draft. + - Use `dev/skills/x-post-draft/SKILL.md`. - Write `artifacts/social/x/.json`. - Use `schema = "social_post_draft/v1"`. - Keep `status = "draft"` until approval. diff --git a/scripts/github/README.md b/scripts/github/README.md index bf42d77e..55a56e55 100644 --- a/scripts/github/README.md +++ b/scripts/github/README.md @@ -45,3 +45,8 @@ These scripts stay deterministic on purpose. Local Codex analysis produces the editorial draft JSON consumed by `render_signal_entry.py`. Trusted automation may invoke the Codex analysis step as long as `auth.json` is injected into `CODEX_HOME` and no credentials are logged or persisted into the repo. + +Repo-local skills under `dev/skills/` are reasoning instructions for the Codex +analysis step and for manual Radar/Publisher work. They do not introduce extra +intermediate artifact schemas unless the conclusion is promoted into one of the +checked-in contracts listed above. diff --git a/scripts/github/run_codex_analysis.py b/scripts/github/run_codex_analysis.py index e5298c3d..65e86582 100644 --- a/scripts/github/run_codex_analysis.py +++ b/scripts/github/run_codex_analysis.py @@ -41,6 +41,8 @@ def build_prompt(bundle_path: Path, repo_root: Path) -> str: return "\n".join( [ "Read and follow these repo-local instructions before drafting:", + "- dev/skills/README.md", + "- dev/skills/codex-code-analysis/SKILL.md", "- dev/skills/github-signal/SKILL.md", "- docs/spec/github-change-bundle.md", "- docs/spec/signal-entry.md", @@ -49,6 +51,8 @@ def build_prompt(bundle_path: Path, repo_root: Path) -> str: f"Analyze the bundle at `{relative_bundle}`.", "", "Return exactly one JSON object matching the provided output schema.", + "Use the code-analysis skill as the in-session behavior-reading pass.", + "Do not invent a separate checked-in code-analysis artifact.", "Treat the pull request as the main narrative container and the commits/files as evidence.", "Do not summarize every commit independently.", "Keep the output publishable for Decodex: concise, user-facing, and evidence-backed.", From 13cfb2bf7ad0d9b61cb9f98e21b5e1efc99ede88 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 13 May 2026 12:36:59 +0800 Subject: [PATCH 3/6] {"schema":"decodex/commit/1","summary":"Add prerelease-first Codex signal sync","authority":"manual"} --- README.md | 3 + dev/skills/README.md | 4 + dev/skills/codex-release-analysis/SKILL.md | 25 ++ dev/skills/codex-upstream-triage/SKILL.md | 17 +- docs/reference/workspace-layout.md | 9 +- docs/runbook/local-github-signal-workflow.md | 55 +++- scripts/github/README.md | 14 +- scripts/github/backfill_release_range.py | 252 +++++++++++------- scripts/github/sync_prerelease_signals.py | 75 ++++++ scripts/github/test_backfill_release_range.py | 103 +++++++ 10 files changed, 446 insertions(+), 111 deletions(-) create mode 100644 scripts/github/sync_prerelease_signals.py create mode 100644 scripts/github/test_backfill_release_range.py diff --git a/README.md b/README.md index 7449fb0c..1df7ab12 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,9 @@ The GitHub-first public signal path stays deterministic and reviewable: are not part of the installable Decodex plugin distribution. - `scripts/github/sync_latest_signals.py` discovers recent merged PRs and refreshes content artifacts. +- `scripts/github/sync_prerelease_signals.py` starts from the latest stable-to-prerelease + compare so Decodex can explain Codex prereleases even when upstream release notes are + sparse. - `docs/spec/upstream-impact.md` records how upstream Codex changes are classified for public signals and Control Plane follow-up work. - `scripts/github/render_signal_entry.py` renders reviewed analysis drafts into site diff --git a/dev/skills/README.md b/dev/skills/README.md index d8d78409..ad774dfc 100644 --- a/dev/skills/README.md +++ b/dev/skills/README.md @@ -25,6 +25,10 @@ follow-up work: Use only the skills needed for the current artifact. Do not create a social draft just because a signal exists. +For OpenAI Codex prereleases, start with `codex-release-analysis` and the latest +stable-to-prerelease compare instead of a generic recent-commit scan. Codex prerelease +notes are often too sparse to explain what changed. + Only the existing checked-in contracts are durable artifacts today: `github_change_bundle/v1`, `analysis_draft`, `signal_entry/v1`, `upstream_impact/v1`, `release_delta/v1`, and `social_post_draft/v1`. The triage, code-analysis, and diff --git a/dev/skills/codex-release-analysis/SKILL.md b/dev/skills/codex-release-analysis/SKILL.md index 21cba5af..67477341 100644 --- a/dev/skills/codex-release-analysis/SKILL.md +++ b/dev/skills/codex-release-analysis/SKILL.md @@ -33,6 +33,8 @@ generation. ## Release Reading Rules +- Treat Codex prereleases as a primary Radar source because their release bodies may be + empty or title-only. - Treat release notes as discovery, not proof, when they are sparse. - Use GitHub compare data and PR mappings to explain what changed between stable and prerelease tags. @@ -43,6 +45,26 @@ generation. - Do not write a release recap that only duplicates a release bot unless there is no deeper evidence-backed angle. +## Codex Prerelease-First Path + +When the target is an OpenAI Codex prerelease: + +1. Refresh or read `release_delta/v1`. +2. Select the top-level `stable_release` -> `prerelease` comparison unless the user + asks for a specific tag pair. +3. Use `compare.pr_numbers` and `compare.commit_shas` as the discovery queue. +4. Remove PRs that already have published `signal_entry/v1` coverage. +5. Prioritize the remaining PRs by Radar triggers: app-server/protocol, plugins, MCP, + browser/Chrome, tool search, hooks, permissions, sandboxing, config, auth, + providers, and visible CLI/TUI behavior. +6. Build PR-first bundles for the selected unpublished PRs and run + `codex-code-analysis` before `github-signal`. +7. Refresh `release_delta/v1` after new signals are rendered so the homepage can map + prerelease deltas to the new tracked signals. + +Use `scripts/github/sync_prerelease_signals.py` for the default latest-prerelease +automation path. + ## Analysis Modes Use exactly one primary mode: @@ -54,6 +76,9 @@ Use exactly one primary mode: | `operator_impact` | Release changes app-server, plugins, browser, MCP, permissions, sandbox, hooks, config, auth, or providers. | `upstream_impact/v1` plus possible follow-up issue. | | `watch_note` | The release is interesting but evidence is incomplete. | Watch note with caveats. | +For sparse Codex prereleases, prefer `delta_explainer` or `operator_impact` over +`release_pulse`; the release version alone is rarely the useful story. + ## Style Lessons - Release-bot style is useful for speed: version, three bullets, source link. diff --git a/dev/skills/codex-upstream-triage/SKILL.md b/dev/skills/codex-upstream-triage/SKILL.md index 4ee8ce73..537d74be 100644 --- a/dev/skills/codex-upstream-triage/SKILL.md +++ b/dev/skills/codex-upstream-triage/SKILL.md @@ -29,15 +29,20 @@ Decodex plugin skill. Use the lightest source that can answer the triage question: -1. GitHub release or compare metadata when the user asks about a release. +1. GitHub release-delta or compare metadata when the user asks about a Codex release + or prerelease. 2. GitHub PR metadata when a PR number is known. 3. GitHub commit metadata when only a SHA is known. 4. Upstream changelog or browser observation when the question is about public product framing. -For a latest-commit pass, list recent upstream commits first, then resolve promising -commits back to PRs before building bundles. A commit list is a queue, not final -evidence. +For a latest Codex pass, prefer the latest stable-to-prerelease compare before a +generic recent-commit scan. Prerelease bodies are often sparse, so the compare PR and +commit set is the useful queue. + +For a latest-commit pass outside a release window, list recent upstream commits first, +then resolve promising commits back to PRs before building bundles. A commit list is a +queue, not final evidence. ## Candidate Ladder @@ -49,6 +54,7 @@ Classify each item as exactly one: | `watch` | Interesting but too weak, too hidden, or too broad. | Optional `upstream_impact/v1` with `control_plane_impact = "watch"`. | | `bundle` | Enough GitHub context exists for code analysis. | Build or reuse a `github_change_bundle/v1`. | | `release_review` | Release or changelog framing needs comparison against commits and signals. | Use `codex-release-analysis`. | +| `prerelease_delta` | A sparse Codex prerelease needs compare-driven reconstruction. | Use `codex-release-analysis`, then build bundles for unpublished compare PRs. | | `style_reference` | Useful only as style or audience evidence. | Save no technical artifact; use only as optional style context when a separate source-backed draft exists. | ## Grouping Rules @@ -72,7 +78,8 @@ Escalate to `codex-code-analysis` when changed files or release text mention: - CLI/TUI behavior visible to a normal Codex user Escalate to `codex-release-analysis` when the source is a release, prerelease, app -update, or public changelog. +update, or public changelog. For Codex prereleases, default to `prerelease_delta` +unless the release body is already explanatory enough to stand on its own. Escalate to `x-post-draft` only after there is technical source evidence and a clear Publisher angle. Style references from X must not start a social draft by themselves. diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md index b76a7019..c8c893ca 100644 --- a/docs/reference/workspace-layout.md +++ b/docs/reference/workspace-layout.md @@ -77,9 +77,12 @@ Those runtime and operator surfaces stay in `apps/decodex/` and `docs/spec/`. `scripts/github/` owns deterministic content scripts. Its automated Codex step may apply the repo-local code-analysis and GitHub-signal instructions under `dev/skills/` to produce the existing `analysis_draft` JSON consumed by `render_signal_entry.py`. -The broader upstream triage, release-analysis, and X-drafting skills are manual -Radar/Publisher reasoning surfaces unless a later script explicitly wires them into a -checked-in contract. Generated GitHub bundles and analysis drafts live under +`sync_prerelease_signals.py` is the prerelease-first entrypoint: it refreshes the +stable-to-prerelease release delta, selects unpublished compare PRs, and reuses the +existing bundle, analysis-draft, render, and validation path. The broader upstream +triage, release-analysis, and X-drafting skills remain manual Radar/Publisher +reasoning surfaces unless a script explicitly wires them into a checked-in contract. +Generated GitHub bundles and analysis drafts live under `artifacts/github/` and must stay explicit and checked into the repository. `artifacts/github/impact/` may hold `upstream_impact/v1` classifications when an diff --git a/docs/runbook/local-github-signal-workflow.md b/docs/runbook/local-github-signal-workflow.md index fad1d760..6c897846 100644 --- a/docs/runbook/local-github-signal-workflow.md +++ b/docs/runbook/local-github-signal-workflow.md @@ -28,24 +28,28 @@ Outputs: ## Workflow -1. Triage upstream Codex activity with `dev/skills/codex-upstream-triage/` when the - candidate is not already chosen by automation or by the operator. -2. Build a normalized GitHub change bundle under `artifacts/github/bundles/` for +1. For OpenAI Codex prereleases, start from the prerelease-first path because upstream + prerelease notes are often sparse: refresh `release_delta/v1`, compare the latest + stable tag to the latest prerelease tag, and use unpublished compare PRs as the + candidate queue. +2. Triage other upstream Codex activity with `dev/skills/codex-upstream-triage/` when + the candidate is not already chosen by automation or by the operator. +3. Build a normalized GitHub change bundle under `artifacts/github/bundles/` for selected candidates. -3. Analyze source behavior with `dev/skills/codex-code-analysis/` as an in-session +4. Analyze source behavior with `dev/skills/codex-code-analysis/` as an in-session reasoning pass; do not create a separate checked-in artifact for this pass. -4. Use `dev/skills/codex-release-analysis/` when the source is a release, prerelease, +5. Use `dev/skills/codex-release-analysis/` when the source is a release, prerelease, app update, or changelog entry. -5. Run final signal drafting with `dev/skills/github-signal/` and save the +6. Run final signal drafting with `dev/skills/github-signal/` and save the `analysis_draft` JSON under `artifacts/github/analysis/`. -6. Render the resulting signal entry into `site/src/content/signals/`. -7. Validate the signal entry shape and collection consistency. -8. Classify upstream impact when the change may affect Control Plane or Publisher. -9. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. -10. Draft optional social publishing content only through +7. Render the resulting signal entry into `site/src/content/signals/`. +8. Validate the signal entry shape and collection consistency. +9. Classify upstream impact when the change may affect Control Plane or Publisher. +10. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. +11. Draft optional social publishing content only through [`social-publishing-workflow.md`](./social-publishing-workflow.md). -11. Review the rendered content manually in the homepage feed. -12. Push the content update and let CI build and deploy the static site. +12. Review the rendered content manually in the homepage feed. +13. Push the content update and let CI build and deploy the static site. ## Deterministic commands @@ -93,6 +97,26 @@ python3 scripts/github/build_release_delta.py \ --out site/src/content/release-deltas/openai-codex-latest.json ``` +Sync unpublished signals from the latest prerelease compare: + +```bash +python3 scripts/github/sync_prerelease_signals.py \ + --repo openai/codex \ + --max-prs 3 +``` + +Preview the latest prerelease queue without generating content: + +```bash +python3 scripts/github/sync_prerelease_signals.py \ + --repo openai/codex \ + --dry-run +``` + +`sync_prerelease_signals.py --dry-run` refreshes the prerelease compare into a +temporary release-delta file, so it does not mutate checked-in content while listing +the queue. + The repository already includes a real sample for this flow: - bundle: `artifacts/github/bundles/openai-codex-pr-15222.json` @@ -113,6 +137,10 @@ Automated hourly sync entrypoint: - `scripts/github/sync_latest_signals.py` +Prerelease-first sync entrypoint: + +- `scripts/github/sync_prerelease_signals.py` + ## Editorial gate Publish only when the change meets at least one of these tests: @@ -129,6 +157,7 @@ Skip or defer entries for: For the release-delta artifact: +- treat Codex prereleases as a high-value source even when release notes are empty - include only signals whose source commit SHAs appear in the stable-versus-prerelease compare set - prefer highlighting the smaller tracked subset over trying to summarize every internal commit in the compare - do not treat prerelease notes alone as sufficient editorial evidence when the release body is empty diff --git a/scripts/github/README.md b/scripts/github/README.md index 55a56e55..968ecf9e 100644 --- a/scripts/github/README.md +++ b/scripts/github/README.md @@ -5,15 +5,19 @@ This directory owns deterministic GitHub-first Decodex scripts. Current scripts: - `build_change_bundle.py` +- `build_release_delta.py` +- `backfill_release_range.py` - `run_codex_analysis.py` - `sync_latest_signals.py` +- `sync_prerelease_signals.py` - `validate_change_bundle.py` - `render_signal_entry.py` - `validate_signal_entry.py` -Current schema-only contracts: +Current checked contracts: - `analysis_draft.schema.json` +- `release_delta/v1` is validated by `contracts.py` - `upstream_impact.schema.json` - `social_post_draft.schema.json` @@ -41,6 +45,14 @@ python3 scripts/github/validate_signal_entry.py \ site/src/content/signals/openai-codex-pr-15222.json ``` +Prerelease-first flow: + +```bash +python3 scripts/github/sync_prerelease_signals.py \ + --repo openai/codex \ + --max-prs 3 +``` + These scripts stay deterministic on purpose. Local Codex analysis produces the editorial draft JSON consumed by `render_signal_entry.py`. Trusted automation may invoke the Codex analysis step as long as `auth.json` is injected into diff --git a/scripts/github/backfill_release_range.py b/scripts/github/backfill_release_range.py index d37e5727..5acfbf4d 100644 --- a/scripts/github/backfill_release_range.py +++ b/scripts/github/backfill_release_range.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Backfill unpublished GitHub signals for a selected stable->preview compare range.""" +"""Backfill unpublished GitHub signals for a stable->preview prerelease compare range.""" from __future__ import annotations @@ -9,6 +9,7 @@ import re import subprocess import sys +import tempfile from pathlib import Path from typing import Any @@ -27,7 +28,10 @@ def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--repo", default="openai/codex", help="GitHub repository in owner/name format.") parser.add_argument("--release-delta", default="site/src/content/release-deltas/openai-codex-latest.json") - parser.add_argument("--stable-tag", required=True, help="Stable tag name to backfill from.") + parser.add_argument( + "--stable-tag", + help="Stable tag name to backfill from. Defaults to the top-level stable release.", + ) parser.add_argument("--preview-tag", help="Preview tag name to backfill to. Defaults to the top-level prerelease.") parser.add_argument("--signals-dir", default="site/src/content/signals") parser.add_argument("--bundles-dir", default="artifacts/github/bundles") @@ -37,6 +41,26 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--model", help="Optional Codex model override.") parser.add_argument("--max-prs", type=int, help="Optional limit for debugging or partial runs.") parser.add_argument("--dry-run", action="store_true", help="Print target PRs without generating new content.") + parser.add_argument( + "--refresh-release-delta-first", + action="store_true", + help="Refresh the release-delta artifact before selecting the prerelease compare range.", + ) + parser.add_argument( + "--refresh-stable-limit", + type=int, + help="Stable release limit used only by --refresh-release-delta-first.", + ) + parser.add_argument( + "--refresh-preview-limit", + type=int, + help="Prerelease limit used only by --refresh-release-delta-first.", + ) + parser.add_argument( + "--refresh-pair-limit", + type=int, + help="Compare pair limit used only by --refresh-release-delta-first.", + ) return parser.parse_args() @@ -60,17 +84,22 @@ def published_pr_numbers(signals_dir: Path) -> set[int]: return published -def load_selected_comparison(path: Path, stable_tag: str, preview_tag: str | None) -> tuple[dict[str, Any], str]: +def load_selected_comparison( + path: Path, + stable_tag: str | None, + preview_tag: str | None, +) -> tuple[dict[str, Any], str, str]: payload = load_json(path) validation = validate_release_delta(payload) if not validation.ok: raise SystemExit("Release-delta validation failed:\n- " + "\n- ".join(validation.errors)) + target_stable = stable_tag or payload["stable_release"]["tag_name"] target_preview = preview_tag or payload["prerelease"]["tag_name"] for item in payload.get("comparisons", []): - if item["stable_tag_name"] == stable_tag and item["prerelease_tag_name"] == target_preview: - return item, target_preview - raise SystemExit(f"No comparison found for {stable_tag} -> {target_preview}") + if item["stable_tag_name"] == target_stable and item["prerelease_tag_name"] == target_preview: + return item, target_stable, target_preview + raise SystemExit(f"No comparison found for {target_stable} -> {target_preview}") def pr_lookup(repo: str, pr_number: int, token: str | None) -> dict[str, Any]: @@ -91,102 +120,147 @@ def signal_paths(pr_number: int, args: argparse.Namespace) -> tuple[Path, Path, ) +def refresh_release_delta(args: argparse.Namespace) -> None: + command = [ + "build_release_delta.py", + "--repo", + args.repo, + "--signals-dir", + args.signals_dir, + "--out", + args.release_delta, + ] + if args.token_env: + command.extend(["--token-env", args.token_env]) + if args.refresh_stable_limit is not None: + command.extend(["--stable-limit", str(args.refresh_stable_limit)]) + if args.refresh_preview_limit is not None: + command.extend(["--preview-limit", str(args.refresh_preview_limit)]) + if args.refresh_pair_limit is not None: + command.extend(["--pair-limit", str(args.refresh_pair_limit)]) + run_script(*command) + + +def prepare_release_delta_path(args: argparse.Namespace, root: Path) -> tuple[Path, tempfile.TemporaryDirectory[str] | None]: + if not args.refresh_release_delta_first: + return (root / args.release_delta).resolve(), None + + tmpdir = tempfile.TemporaryDirectory(prefix="decodex-prerelease-delta-") + temp_release_delta = Path(tmpdir.name) / "release-delta.json" + refresh_args = argparse.Namespace(**{**vars(args), "release_delta": str(temp_release_delta)}) + refresh_release_delta(refresh_args) + return temp_release_delta.resolve(), tmpdir + + def main() -> None: args = parse_args() root = repo_root() - release_delta_path = (root / args.release_delta).resolve() - comparison, preview_tag = load_selected_comparison(release_delta_path, args.stable_tag, args.preview_tag) - token_env = args.token_env or routed_token_env() or "GITHUB_TOKEN" - token = os.environ.get(token_env) - - signals_dir = (root / args.signals_dir).resolve() - published = published_pr_numbers(signals_dir) - target_prs = [int(number) for number in comparison["compare"].get("pr_numbers", []) if int(number) not in published] - - pr_details: list[dict[str, Any]] = [] - for pr_number in target_prs: - details = pr_lookup(args.repo, pr_number, token) - pr_details.append( - { - "number": pr_number, - "title": details.get("title") or f"PR #{pr_number}", - "merged_at": details.get("merged_at") or "", - "url": details.get("html_url") or "", - } + release_delta_path, tmpdir = prepare_release_delta_path(args, root) + try: + comparison, stable_tag, preview_tag = load_selected_comparison( + release_delta_path, + args.stable_tag, + args.preview_tag, ) - pr_details.sort(key=lambda item: item["merged_at"]) - if args.max_prs is not None: - pr_details = pr_details[: args.max_prs] + token_env = args.token_env or routed_token_env() or "GITHUB_TOKEN" + token = os.environ.get(token_env) - if args.dry_run: - print( - json.dumps( + signals_dir = (root / args.signals_dir).resolve() + published = published_pr_numbers(signals_dir) + target_prs = [ + int(number) + for number in comparison["compare"].get("pr_numbers", []) + if int(number) not in published + ] + if args.max_prs is not None: + target_prs = target_prs[: args.max_prs] + + pr_details: list[dict[str, Any]] = [] + for pr_number in target_prs: + details = pr_lookup(args.repo, pr_number, token) + pr_details.append( { - "stable_tag": args.stable_tag, - "preview_tag": preview_tag, - "target_pr_count": len(pr_details), - "target_prs": pr_details, - }, - indent=2, - sort_keys=True, + "number": pr_number, + "title": details.get("title") or f"PR #{pr_number}", + "merged_at": details.get("merged_at") or "", + "url": details.get("html_url") or "", + } ) - ) - return + pr_details.sort(key=lambda item: item["merged_at"]) - created = 0 - for pr in pr_details: - bundle_path, analysis_path, signal_path = signal_paths(pr["number"], args) - bundle = build_pr_bundle( - args.repo, - pr["number"], - token, - [f"Backfilled from compare range {args.stable_tag}...{preview_tag}"], - ) - dump_json(root / bundle_path, bundle) + if args.dry_run: + print( + json.dumps( + { + "stable_tag": stable_tag, + "preview_tag": preview_tag, + "target_pr_count": len(pr_details), + "target_prs": pr_details, + }, + indent=2, + sort_keys=True, + ) + ) + return + created = 0 + for pr in pr_details: + bundle_path, analysis_path, signal_path = signal_paths(pr["number"], args) + bundle = build_pr_bundle( + args.repo, + pr["number"], + token, + [f"Backfilled from prerelease compare range {stable_tag}...{preview_tag}"], + ) + dump_json(root / bundle_path, bundle) + + run_script( + "run_codex_analysis.py", + "--bundle", + str(root / bundle_path), + "--out", + str(root / analysis_path), + "--repo-root", + str(root), + "--codex-bin", + args.codex_bin, + *(["--model", args.model] if args.model else []), + ) + run_script( + "render_signal_entry.py", + "--bundle", + str(root / bundle_path), + "--analysis", + str(root / analysis_path), + "--out", + str(root / signal_path), + ) + created += 1 + + run_script("validate_signal_entry.py", str(root / args.signals_dir)) run_script( - "run_codex_analysis.py", - "--bundle", - str(root / bundle_path), - "--out", - str(root / analysis_path), - "--repo-root", - str(root), - "--codex-bin", - args.codex_bin, - *(["--model", args.model] if args.model else []), - ) - run_script( - "render_signal_entry.py", - "--bundle", - str(root / bundle_path), - "--analysis", - str(root / analysis_path), + "build_release_delta.py", + "--repo", + args.repo, + "--signals-dir", + args.signals_dir, "--out", - str(root / signal_path), + args.release_delta, + *(["--token-env", args.token_env] if args.token_env else []), ) - created += 1 - - run_script("validate_signal_entry.py", str(root / args.signals_dir)) - run_script( - "build_release_delta.py", - "--repo", - args.repo, - "--signals-dir", - args.signals_dir, - "--out", - "site/src/content/release-deltas/openai-codex-latest.json", - ) - print( - json.dumps( - { - "stable_tag": args.stable_tag, - "preview_tag": preview_tag, - "created": created, - }, - sort_keys=True, + print( + json.dumps( + { + "stable_tag": stable_tag, + "preview_tag": preview_tag, + "created": created, + }, + sort_keys=True, + ) ) - ) + finally: + if tmpdir is not None: + tmpdir.cleanup() if __name__ == "__main__": diff --git a/scripts/github/sync_prerelease_signals.py b/scripts/github/sync_prerelease_signals.py new file mode 100644 index 00000000..de6220e6 --- /dev/null +++ b/scripts/github/sync_prerelease_signals.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +"""Sync unpublished Decodex signals from the latest Codex prerelease compare.""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +from pathlib import Path + +SCRIPT_HOME = Path(__file__).resolve().parent + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo", default="openai/codex", help="GitHub repository in owner/name format.") + parser.add_argument("--release-delta", default="site/src/content/release-deltas/openai-codex-latest.json") + parser.add_argument("--stable-tag", help="Stable tag to compare from. Defaults to the latest stable release.") + parser.add_argument("--preview-tag", help="Prerelease tag to compare to. Defaults to the latest prerelease.") + parser.add_argument("--signals-dir", default="site/src/content/signals") + parser.add_argument("--bundles-dir", default="artifacts/github/bundles") + parser.add_argument("--analysis-dir", default="artifacts/github/analysis") + parser.add_argument("--token-env", help="Environment variable containing a GitHub token.") + parser.add_argument("--codex-bin", default="codex", help="Codex executable to invoke.") + parser.add_argument("--model", help="Optional Codex model override.") + parser.add_argument("--max-prs", type=int, help="Optional limit for debugging or partial runs.") + parser.add_argument("--dry-run", action="store_true", help="Print target PRs without generating new content.") + return parser.parse_args() + + +def append_optional(command: list[str], flag: str, value: str | int | None) -> None: + if value is None: + return + command.extend([flag, str(value)]) + + +def main() -> None: + args = parse_args() + command = [ + sys.executable, + str(SCRIPT_HOME / "backfill_release_range.py"), + "--repo", + args.repo, + "--release-delta", + args.release_delta, + "--signals-dir", + args.signals_dir, + "--bundles-dir", + args.bundles_dir, + "--analysis-dir", + args.analysis_dir, + "--codex-bin", + args.codex_bin, + "--refresh-release-delta-first", + "--refresh-stable-limit", + "1", + "--refresh-preview-limit", + "1", + "--refresh-pair-limit", + "1", + ] + append_optional(command, "--stable-tag", args.stable_tag) + append_optional(command, "--preview-tag", args.preview_tag) + append_optional(command, "--token-env", args.token_env) + append_optional(command, "--model", args.model) + append_optional(command, "--max-prs", args.max_prs) + if args.dry_run: + command.append("--dry-run") + + completed = subprocess.run(command, check=False) + raise SystemExit(completed.returncode) + + +if __name__ == "__main__": + main() diff --git a/scripts/github/test_backfill_release_range.py b/scripts/github/test_backfill_release_range.py new file mode 100644 index 00000000..7e606cb1 --- /dev/null +++ b/scripts/github/test_backfill_release_range.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path + +MODULE_PATH = Path(__file__).resolve().with_name("backfill_release_range.py") +MODULE_SPEC = importlib.util.spec_from_file_location("backfill_release_range", MODULE_PATH) +if MODULE_SPEC is None or MODULE_SPEC.loader is None: + raise RuntimeError(f"Unable to load {MODULE_PATH}") +backfill_release_range = importlib.util.module_from_spec(MODULE_SPEC) +MODULE_SPEC.loader.exec_module(backfill_release_range) + + +def release(tag_name: str, prerelease: bool, published_at: str) -> dict[str, object]: + return { + "tag_name": tag_name, + "name": tag_name, + "prerelease": prerelease, + "published_at": published_at, + "url": f"https://github.com/openai/codex/releases/tag/{tag_name}", + } + + +def compare(stable_tag: str, preview_tag: str, pr_numbers: list[int]) -> dict[str, object]: + return { + "stable_tag_name": stable_tag, + "prerelease_tag_name": preview_tag, + "compare": { + "status": "ahead", + "ahead_by": len(pr_numbers), + "total_commits": len(pr_numbers), + "url": f"https://github.com/openai/codex/compare/{stable_tag}...{preview_tag}", + "commit_shas": [f"deadbeef{number}" for number in pr_numbers], + "pr_numbers": pr_numbers, + }, + "tracked_signal_slugs": [], + } + + +class LoadSelectedComparisonTests(unittest.TestCase): + def write_release_delta(self, path: Path) -> None: + payload = { + "schema": "release_delta/v1", + "repo": "openai/codex", + "tag_prefix": "rust-v", + "generated_at": "2026-05-13T00:00:00Z", + "stable_release": release("rust-v0.130.0", False, "2026-05-01T00:00:00Z"), + "prerelease": release("rust-v0.131.0-alpha.9", True, "2026-05-12T00:00:00Z"), + "compare": compare("rust-v0.130.0", "rust-v0.131.0-alpha.9", [22404])["compare"], + "release_options": { + "stable": [ + release("rust-v0.130.0", False, "2026-05-01T00:00:00Z"), + release("rust-v0.129.0", False, "2026-04-20T00:00:00Z"), + ], + "preview": [ + release("rust-v0.131.0-alpha.9", True, "2026-05-12T00:00:00Z"), + release("rust-v0.131.0-alpha.8", True, "2026-05-11T00:00:00Z"), + ], + }, + "comparisons": [ + compare("rust-v0.130.0", "rust-v0.131.0-alpha.9", [22404]), + compare("rust-v0.129.0", "rust-v0.131.0-alpha.8", [22397]), + ], + "tracked_signal_slugs": [], + } + path.write_text(json.dumps(payload), encoding="utf-8") + + def test_defaults_to_top_level_stable_and_prerelease(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + path = Path(tmpdir) / "release-delta.json" + self.write_release_delta(path) + + comparison, stable_tag, preview_tag = backfill_release_range.load_selected_comparison( + path, + None, + None, + ) + + self.assertEqual(stable_tag, "rust-v0.130.0") + self.assertEqual(preview_tag, "rust-v0.131.0-alpha.9") + self.assertEqual(comparison["compare"]["pr_numbers"], [22404]) + + def test_can_select_explicit_stable_and_prerelease(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + path = Path(tmpdir) / "release-delta.json" + self.write_release_delta(path) + + comparison, stable_tag, preview_tag = backfill_release_range.load_selected_comparison( + path, + "rust-v0.129.0", + "rust-v0.131.0-alpha.8", + ) + + self.assertEqual(stable_tag, "rust-v0.129.0") + self.assertEqual(preview_tag, "rust-v0.131.0-alpha.8") + self.assertEqual(comparison["compare"]["pr_numbers"], [22397]) + + +if __name__ == "__main__": + unittest.main() From 80c329bfe5a95d38d10dab09770ac34eea6670ed Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 13 May 2026 14:04:20 +0800 Subject: [PATCH 4/6] {"schema":"decodex/commit/1","summary":"Use continuous Codex commit radar","authority":"manual"} --- README.md | 9 +- dev/skills/README.md | 12 +- dev/skills/codex-release-analysis/SKILL.md | 39 +++--- dev/skills/codex-upstream-triage/SKILL.md | 25 ++-- dev/skills/x-post-draft/SKILL.md | 7 +- docs/reference/workspace-layout.md | 12 +- .../2026-05-13_codex-radar-skill-split.json | 2 +- docs/runbook/local-github-signal-workflow.md | 54 ++++----- docs/runbook/social-publishing-workflow.md | 10 +- docs/spec/social-post-draft.md | 6 +- scripts/github/README.md | 16 ++- scripts/github/backfill_release_range.py | 4 +- scripts/github/social_post_draft.schema.json | 9 +- scripts/github/sync_latest_signals.py | 114 +++++++++++++----- scripts/github/sync_prerelease_signals.py | 75 ------------ 15 files changed, 203 insertions(+), 191 deletions(-) delete mode 100644 scripts/github/sync_prerelease_signals.py diff --git a/README.md b/README.md index 1df7ab12..27ec293b 100644 --- a/README.md +++ b/README.md @@ -158,11 +158,10 @@ The GitHub-first public signal path stays deterministic and reviewable: `artifacts/github/bundles/`. - `dev/skills/README.md` routes the repo-local Radar and editorial instructions. They are not part of the installable Decodex plugin distribution. -- `scripts/github/sync_latest_signals.py` discovers recent merged PRs and refreshes - content artifacts. -- `scripts/github/sync_prerelease_signals.py` starts from the latest stable-to-prerelease - compare so Decodex can explain Codex prereleases even when upstream release notes are - sparse. +- `scripts/github/sync_latest_signals.py` discovers recent upstream commits, resolves + them back to PRs when possible, and refreshes content artifacts. +- `scripts/github/backfill_release_range.py` fills release-window gaps before a + release or prerelease summary, but daily Radar still starts from the commit stream. - `docs/spec/upstream-impact.md` records how upstream Codex changes are classified for public signals and Control Plane follow-up work. - `scripts/github/render_signal_entry.py` renders reviewed analysis drafts into site diff --git a/dev/skills/README.md b/dev/skills/README.md index ad774dfc..e8c1c48d 100644 --- a/dev/skills/README.md +++ b/dev/skills/README.md @@ -25,9 +25,15 @@ follow-up work: Use only the skills needed for the current artifact. Do not create a social draft just because a signal exists. -For OpenAI Codex prereleases, start with `codex-release-analysis` and the latest -stable-to-prerelease compare instead of a generic recent-commit scan. Codex prerelease -notes are often too sparse to explain what changed. +Default posture: track every upstream Codex commit as a possible evidence unit. Resolve +commits back to PRs when possible, decide whether the change matters to Decodex Control +Plane or the wider Codex community, and only then promote important, useful, or +deprecated behavior into a signal, upstream-impact artifact, follow-up issue, or X +draft. + +For upstream releases and prereleases, use `codex-release-analysis` as a rollup over +the accumulated commit/PR analysis. Codex prerelease notes are often too sparse to +explain what changed by themselves. Only the existing checked-in contracts are durable artifacts today: `github_change_bundle/v1`, `analysis_draft`, `signal_entry/v1`, `upstream_impact/v1`, diff --git a/dev/skills/codex-release-analysis/SKILL.md b/dev/skills/codex-release-analysis/SKILL.md index 67477341..367f62a1 100644 --- a/dev/skills/codex-release-analysis/SKILL.md +++ b/dev/skills/codex-release-analysis/SKILL.md @@ -33,8 +33,8 @@ generation. ## Release Reading Rules -- Treat Codex prereleases as a primary Radar source because their release bodies may be - empty or title-only. +- Treat release and prerelease tags as reporting checkpoints over the commit/PR stream, + not as a separate higher-priority intake lane. - Treat release notes as discovery, not proof, when they are sparse. - Use GitHub compare data and PR mappings to explain what changed between stable and prerelease tags. @@ -42,28 +42,26 @@ generation. or PR evidence. - Do not imply a feature is broadly available when the source says alpha, beta, rollout, platform-gated, or config-gated. -- Do not write a release recap that only duplicates a release bot unless there is no - deeper evidence-backed angle. +- Do not write a release recap that only duplicates a release bot. Prefer a summary + built from accumulated Decodex signal, upstream-impact, and commit/PR analysis. -## Codex Prerelease-First Path +## Release Rollup Path -When the target is an OpenAI Codex prerelease: +When the target is an OpenAI Codex release or prerelease: 1. Refresh or read `release_delta/v1`. 2. Select the top-level `stable_release` -> `prerelease` comparison unless the user asks for a specific tag pair. -3. Use `compare.pr_numbers` and `compare.commit_shas` as the discovery queue. -4. Remove PRs that already have published `signal_entry/v1` coverage. -5. Prioritize the remaining PRs by Radar triggers: app-server/protocol, plugins, MCP, - browser/Chrome, tool search, hooks, permissions, sandboxing, config, auth, - providers, and visible CLI/TUI behavior. -6. Build PR-first bundles for the selected unpublished PRs and run - `codex-code-analysis` before `github-signal`. +3. Start from existing `signal_entry/v1`, `upstream_impact/v1`, and recent + commit/PR analyses that match the compare range. +4. Use `compare.pr_numbers` and `compare.commit_shas` to find gaps that still need + code analysis. +5. Group findings by reader value: useful now, important for Decodex Control Plane, + deprecated/removed behavior, and watch-only changes. +6. Draft release or prerelease X reporting only after the summary is grounded in those + historical analyses. 7. Refresh `release_delta/v1` after new signals are rendered so the homepage can map - prerelease deltas to the new tracked signals. - -Use `scripts/github/sync_prerelease_signals.py` for the default latest-prerelease -automation path. + the release window to tracked signals. ## Analysis Modes @@ -76,8 +74,9 @@ Use exactly one primary mode: | `operator_impact` | Release changes app-server, plugins, browser, MCP, permissions, sandbox, hooks, config, auth, or providers. | `upstream_impact/v1` plus possible follow-up issue. | | `watch_note` | The release is interesting but evidence is incomplete. | Watch note with caveats. | -For sparse Codex prereleases, prefer `delta_explainer` or `operator_impact` over -`release_pulse`; the release version alone is rarely the useful story. +For sparse Codex prereleases, prefer `delta_explainer`, `operator_impact`, or a +source-backed release rollup over `release_pulse`; the release version alone is rarely +the useful story. ## Style Lessons @@ -98,7 +97,7 @@ Return: - user-facing takeaway - Control Plane impact, if any - Publisher recommendation: no post, `release_pulse`, `practical_explainer`, - `operator_impact`, or `watch_note` + `operator_impact`, `release_rollup`, or `watch_note` Promote durable conclusions into existing artifacts only: `upstream_impact/v1`, `analysis_draft` plus rendered `signal_entry/v1`, refreshed `release_delta/v1`, or diff --git a/dev/skills/codex-upstream-triage/SKILL.md b/dev/skills/codex-upstream-triage/SKILL.md index 537d74be..f74cd920 100644 --- a/dev/skills/codex-upstream-triage/SKILL.md +++ b/dev/skills/codex-upstream-triage/SKILL.md @@ -29,20 +29,20 @@ Decodex plugin skill. Use the lightest source that can answer the triage question: -1. GitHub release-delta or compare metadata when the user asks about a Codex release - or prerelease. -2. GitHub PR metadata when a PR number is known. -3. GitHub commit metadata when only a SHA is known. +1. GitHub commit metadata for the upstream commit stream. +2. GitHub PR metadata when a commit maps to a merged PR or a PR number is known. +3. GitHub release-delta or compare metadata when the user asks for a release or + prerelease rollup. 4. Upstream changelog or browser observation when the question is about public product framing. -For a latest Codex pass, prefer the latest stable-to-prerelease compare before a -generic recent-commit scan. Prerelease bodies are often sparse, so the compare PR and -commit set is the useful queue. +For normal Radar operation, scan recent upstream commits first and resolve each commit +back to a PR when possible. A commit list is a queue for understanding and +classification, not final evidence. -For a latest-commit pass outside a release window, list recent upstream commits first, -then resolve promising commits back to PRs before building bundles. A commit list is a -queue, not final evidence. +For release or prerelease work, compare metadata is a rollup index over the commit/PR +history already being analyzed. Do not let a release tag displace the underlying commit +and PR evidence. ## Candidate Ladder @@ -54,7 +54,6 @@ Classify each item as exactly one: | `watch` | Interesting but too weak, too hidden, or too broad. | Optional `upstream_impact/v1` with `control_plane_impact = "watch"`. | | `bundle` | Enough GitHub context exists for code analysis. | Build or reuse a `github_change_bundle/v1`. | | `release_review` | Release or changelog framing needs comparison against commits and signals. | Use `codex-release-analysis`. | -| `prerelease_delta` | A sparse Codex prerelease needs compare-driven reconstruction. | Use `codex-release-analysis`, then build bundles for unpublished compare PRs. | | `style_reference` | Useful only as style or audience evidence. | Save no technical artifact; use only as optional style context when a separate source-backed draft exists. | ## Grouping Rules @@ -78,8 +77,8 @@ Escalate to `codex-code-analysis` when changed files or release text mention: - CLI/TUI behavior visible to a normal Codex user Escalate to `codex-release-analysis` when the source is a release, prerelease, app -update, or public changelog. For Codex prereleases, default to `prerelease_delta` -unless the release body is already explanatory enough to stand on its own. +update, or public changelog. For Codex releases and prereleases, summarize from prior +commit/PR analysis whenever possible, then use compare data to find gaps. Escalate to `x-post-draft` only after there is technical source evidence and a clear Publisher angle. Style references from X must not start a social draft by themselves. diff --git a/dev/skills/x-post-draft/SKILL.md b/dev/skills/x-post-draft/SKILL.md index 76b12fd9..4508aa8a 100644 --- a/dev/skills/x-post-draft/SKILL.md +++ b/dev/skills/x-post-draft/SKILL.md @@ -42,6 +42,7 @@ Use these as format patterns only: | Pattern | Good for | Decodex adaptation | | --- | --- | --- | | Release-bot bullet | Fast `release_pulse` posts. | Version or source headline, two or three evidence-backed bullets, source link. | +| Release rollup | `release_rollup` posts after a release or prerelease. | Summarize what prior commit/PR analysis found: useful now, Control Plane impact, deprecations, and watch-only gaps. | | Human workflow read | `practical_explainer` and `operator_impact`. | Start with the concrete workflow change, then explain why it matters and what caveat remains. | | Watch note | Interesting but incomplete evidence. | Say what changed, why Radar is watching, and what evidence is still missing. | @@ -50,13 +51,17 @@ Use these as format patterns only: Choose exactly one `mode` from `social_post_draft/v1`: - `release_pulse`: short release-aware summary with source link. +- `release_rollup`: release or prerelease summary built from accumulated Radar + analysis. - `practical_explainer`: concrete user workflow and expected result. - `operator_impact`: Decodex Control Plane implication. - `thread`: multi-post explanation when one post hides evidence or caveats. - `watch_note`: cautious public note for incomplete evidence. `@decodexspace` should mostly use `practical_explainer` and `operator_impact`. -Use `release_pulse` only when the release itself is the useful alert. +Use `release_pulse` only when the release itself is the useful alert. Use +`release_rollup` when the release or prerelease is best explained by historical +commit/PR analysis rather than by upstream release notes. ## Claim Review diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md index c8c893ca..eaad5b04 100644 --- a/docs/reference/workspace-layout.md +++ b/docs/reference/workspace-layout.md @@ -77,12 +77,12 @@ Those runtime and operator surfaces stay in `apps/decodex/` and `docs/spec/`. `scripts/github/` owns deterministic content scripts. Its automated Codex step may apply the repo-local code-analysis and GitHub-signal instructions under `dev/skills/` to produce the existing `analysis_draft` JSON consumed by `render_signal_entry.py`. -`sync_prerelease_signals.py` is the prerelease-first entrypoint: it refreshes the -stable-to-prerelease release delta, selects unpublished compare PRs, and reuses the -existing bundle, analysis-draft, render, and validation path. The broader upstream -triage, release-analysis, and X-drafting skills remain manual Radar/Publisher -reasoning surfaces unless a script explicitly wires them into a checked-in contract. -Generated GitHub bundles and analysis drafts live under +`sync_latest_signals.py` is the continuous Radar entrypoint: it scans recent upstream +commits, resolves them back to PRs when possible, and reuses the existing bundle, +analysis-draft, render, and validation path. `backfill_release_range.py` fills gaps for +release-window summaries. The broader upstream triage, release-analysis, and X-drafting +skills remain manual Radar/Publisher reasoning surfaces unless a script explicitly +wires them into a checked-in contract. Generated GitHub bundles and analysis drafts live under `artifacts/github/` and must stay explicit and checked into the repository. `artifacts/github/impact/` may hold `upstream_impact/v1` classifications when an diff --git a/docs/research/2026-05-13_codex-radar-skill-split.json b/docs/research/2026-05-13_codex-radar-skill-split.json index 4fcfd697..ed0ce265 100644 --- a/docs/research/2026-05-13_codex-radar-skill-split.json +++ b/docs/research/2026-05-13_codex-radar-skill-split.json @@ -114,7 +114,7 @@ { "id": "E9", "kind": "observation", - "summary": "The current automation path discovers recent merged PRs with title scoring, builds a GitHub bundle, runs Codex to produce an analysis_draft JSON, renders a signal_entry/v1, validates signal entries, and refreshes release_delta/v1.", + "summary": "The current automation path discovers recent upstream commits, resolves them back to PRs when possible, builds a GitHub bundle, runs Codex to produce an analysis_draft JSON, renders a signal_entry/v1, validates signal entries, and refreshes release_delta/v1.", "source_family": "decodex_repo", "source_path": "scripts/github/sync_latest_signals.py; scripts/github/run_codex_analysis.py; scripts/github/render_signal_entry.py" }, diff --git a/docs/runbook/local-github-signal-workflow.md b/docs/runbook/local-github-signal-workflow.md index 6c897846..407e51a3 100644 --- a/docs/runbook/local-github-signal-workflow.md +++ b/docs/runbook/local-github-signal-workflow.md @@ -28,12 +28,10 @@ Outputs: ## Workflow -1. For OpenAI Codex prereleases, start from the prerelease-first path because upstream - prerelease notes are often sparse: refresh `release_delta/v1`, compare the latest - stable tag to the latest prerelease tag, and use unpublished compare PRs as the - candidate queue. -2. Triage other upstream Codex activity with `dev/skills/codex-upstream-triage/` when - the candidate is not already chosen by automation or by the operator. +1. Track upstream Codex commits continuously. Treat each commit as a candidate to + understand, then resolve it back to a PR when possible. +2. Triage upstream activity with `dev/skills/codex-upstream-triage/` when the + candidate is not already chosen by automation or by the operator. 3. Build a normalized GitHub change bundle under `artifacts/github/bundles/` for selected candidates. 4. Analyze source behavior with `dev/skills/codex-code-analysis/` as an in-session @@ -45,11 +43,14 @@ Outputs: 7. Render the resulting signal entry into `site/src/content/signals/`. 8. Validate the signal entry shape and collection consistency. 9. Classify upstream impact when the change may affect Control Plane or Publisher. -10. Regenerate the release-delta artifact so the homepage compares the latest stable release to the latest prerelease using the updated signal set. +10. Regenerate the release-delta artifact so the homepage compares release windows + using the updated signal set. 11. Draft optional social publishing content only through [`social-publishing-workflow.md`](./social-publishing-workflow.md). -12. Review the rendered content manually in the homepage feed. -13. Push the content update and let CI build and deploy the static site. +12. When upstream publishes a release or prerelease, use `codex-release-analysis` to + roll up the accumulated commit/PR analysis into a release summary or X draft. +13. Review the rendered content manually in the homepage feed. +14. Push the content update and let CI build and deploy the static site. ## Deterministic commands @@ -97,25 +98,20 @@ python3 scripts/github/build_release_delta.py \ --out site/src/content/release-deltas/openai-codex-latest.json ``` -Sync unpublished signals from the latest prerelease compare: +Preview unpublished PRs from a selected release compare range without generating +content: ```bash -python3 scripts/github/sync_prerelease_signals.py \ - --repo openai/codex \ - --max-prs 3 -``` - -Preview the latest prerelease queue without generating content: - -```bash -python3 scripts/github/sync_prerelease_signals.py \ +python3 scripts/github/backfill_release_range.py \ --repo openai/codex \ + --stable-tag rust-v0.130.0 \ + --preview-tag rust-v0.131.0-alpha.9 \ --dry-run ``` -`sync_prerelease_signals.py --dry-run` refreshes the prerelease compare into a -temporary release-delta file, so it does not mutate checked-in content while listing -the queue. +Use release-range backfill to fill gaps in the accumulated commit/PR analysis before a +release or prerelease summary. It should supplement continuous commit tracking, not +replace it. The repository already includes a real sample for this flow: @@ -133,14 +129,10 @@ plugin. Today only `github_change_bundle/v1`, `analysis_draft`, `signal_entry/v1 `upstream_impact/v1`, `release_delta/v1`, and `social_post_draft/v1` are durable content contracts for this workflow. -Automated hourly sync entrypoint: +Automated sync entrypoint: - `scripts/github/sync_latest_signals.py` -Prerelease-first sync entrypoint: - -- `scripts/github/sync_prerelease_signals.py` - ## Editorial gate Publish only when the change meets at least one of these tests: @@ -157,10 +149,11 @@ Skip or defer entries for: For the release-delta artifact: -- treat Codex prereleases as a high-value source even when release notes are empty - include only signals whose source commit SHAs appear in the stable-versus-prerelease compare set - prefer highlighting the smaller tracked subset over trying to summarize every internal commit in the compare - do not treat prerelease notes alone as sufficient editorial evidence when the release body is empty +- use release and prerelease publication time as a summary checkpoint over accumulated + commit/PR analysis, not as the primary source of truth For upstream-impact and social-draft artifacts: @@ -173,8 +166,9 @@ For upstream-impact and social-draft artifacts: The current Decodex boundary is: - local Codex run: manual editorial review, batch backfills, and prompt iteration -- deterministic scripts: bundle fetch, Codex analysis execution, render, and validation -- trusted CI runner: hourly refresh of recent merged PRs plus normal site validation and commit/push of changed content +- deterministic scripts: commit/PR discovery, bundle fetch, Codex analysis execution, + render, and validation +- trusted CI runner: refresh of recent upstream commits plus normal site validation and commit/push of changed content The hourly GitHub Actions path assumes: diff --git a/docs/runbook/social-publishing-workflow.md b/docs/runbook/social-publishing-workflow.md index 262a2c11..ed3154fe 100644 --- a/docs/runbook/social-publishing-workflow.md +++ b/docs/runbook/social-publishing-workflow.md @@ -57,7 +57,7 @@ for technical claims. 3. Decide whether to draft a post. - Draft when the change has a clear `release_pulse`, `practical_explainer`, - `operator_impact`, or `watch_note` angle. + `release_rollup`, `operator_impact`, or `watch_note` angle. - Skip when the change is internal cleanup, too weakly sourced, too private, or too vague for a useful reader takeaway. @@ -92,6 +92,14 @@ Use `release_pulse` when: - the post is mainly fast awareness - the change does not yet justify a deeper Decodex angle +Use `release_rollup` when: + +- upstream publishes a release or prerelease +- Decodex already has commit/PR analysis, signals, or upstream-impact notes in that + release window +- the post should summarize useful changes, Control Plane implications, deprecations, + and watch-only gaps without pretending upstream release notes contain that detail + Use `practical_explainer` when: - a reader can try the change in one short session diff --git a/docs/spec/social-post-draft.md b/docs/spec/social-post-draft.md index ba52391d..1ea16598 100644 --- a/docs/spec/social-post-draft.md +++ b/docs/spec/social-post-draft.md @@ -62,14 +62,16 @@ Use exactly one `mode` value: | Value | Purpose | | --- | --- | | `release_pulse` | Short release-aware summary with a source link. | +| `release_rollup` | Release or prerelease summary built from accumulated signal, upstream-impact, and commit/PR analysis. | | `practical_explainer` | Concrete user-facing explanation of how to try or reason about a feature. | | `operator_impact` | Decodex-specific explanation of app-server, plugin, browser, MCP, sandbox, config, or orchestration implications. | | `thread` | Multi-post explanation when one post would hide important evidence or caveats. | | `watch_note` | Cautious note for interesting changes that are not ready for a strong recommendation. | `release_pulse` should be the minority path for `@decodexspace`; the account should -differ from release-only bots by preferring `practical_explainer` and `operator_impact` -drafts when evidence supports them. +differ from release-only bots by preferring `practical_explainer`, +`operator_impact`, and evidence-backed `release_rollup` drafts when evidence supports +them. ## Claim rules diff --git a/scripts/github/README.md b/scripts/github/README.md index 968ecf9e..1b505035 100644 --- a/scripts/github/README.md +++ b/scripts/github/README.md @@ -9,7 +9,6 @@ Current scripts: - `backfill_release_range.py` - `run_codex_analysis.py` - `sync_latest_signals.py` -- `sync_prerelease_signals.py` - `validate_change_bundle.py` - `render_signal_entry.py` - `validate_signal_entry.py` @@ -45,11 +44,22 @@ python3 scripts/github/validate_signal_entry.py \ site/src/content/signals/openai-codex-pr-15222.json ``` -Prerelease-first flow: +Continuous commit sync: ```bash -python3 scripts/github/sync_prerelease_signals.py \ +python3 scripts/github/sync_latest_signals.py \ --repo openai/codex \ + --search-limit 20 \ + --max-new-prs 3 +``` + +Release-window gap fill: + +```bash +python3 scripts/github/backfill_release_range.py \ + --repo openai/codex \ + --stable-tag rust-v0.130.0 \ + --preview-tag rust-v0.131.0-alpha.9 \ --max-prs 3 ``` diff --git a/scripts/github/backfill_release_range.py b/scripts/github/backfill_release_range.py index 5acfbf4d..a9deaad3 100644 --- a/scripts/github/backfill_release_range.py +++ b/scripts/github/backfill_release_range.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Backfill unpublished GitHub signals for a stable->preview prerelease compare range.""" +"""Backfill unpublished GitHub signals for a selected release compare range.""" from __future__ import annotations @@ -210,7 +210,7 @@ def main() -> None: args.repo, pr["number"], token, - [f"Backfilled from prerelease compare range {stable_tag}...{preview_tag}"], + [f"Backfilled from release compare range {stable_tag}...{preview_tag}"], ) dump_json(root / bundle_path, bundle) diff --git a/scripts/github/social_post_draft.schema.json b/scripts/github/social_post_draft.schema.json index a04c7c26..0ec4aee9 100644 --- a/scripts/github/social_post_draft.schema.json +++ b/scripts/github/social_post_draft.schema.json @@ -34,7 +34,14 @@ }, "mode": { "type": "string", - "enum": ["release_pulse", "practical_explainer", "operator_impact", "thread", "watch_note"] + "enum": [ + "release_pulse", + "release_rollup", + "practical_explainer", + "operator_impact", + "thread", + "watch_note" + ] }, "status": { "type": "string", diff --git a/scripts/github/sync_latest_signals.py b/scripts/github/sync_latest_signals.py index 6f37cf20..fe0167fb 100644 --- a/scripts/github/sync_latest_signals.py +++ b/scripts/github/sync_latest_signals.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Discover recent merged PRs, generate Decodex signals, and refresh release deltas.""" +"""Discover recent upstream commits, generate Decodex signals, and refresh release deltas.""" from __future__ import annotations @@ -17,9 +17,17 @@ if str(SCRIPT_HOME) not in sys.path: sys.path.insert(0, str(SCRIPT_HOME)) -from build_change_bundle import build_pr_bundle, github_request, repo_default_branch, routed_token_env # noqa: E402 +from build_change_bundle import ( # noqa: E402 + build_commit_bundle, + build_pr_bundle, + github_request, + maybe_promote_commit_to_pr, + repo_default_branch, + routed_token_env, +) from contracts import dump_json, load_json, validate_signal # noqa: E402 +COMMIT_URL_RE = re.compile(r"/commit/([0-9a-f]{7,40})$") PR_URL_RE = re.compile(r"/pull/(\d+)$") POSITIVE_TITLE_TERMS = ( "feat", @@ -72,8 +80,8 @@ def parse_args() -> argparse.Namespace: default="site/src/content/release-deltas/openai-codex-latest.json", help="Path to write the release delta artifact.", ) - parser.add_argument("--search-limit", type=int, default=20, help="How many recent merged PRs to inspect.") - parser.add_argument("--max-new-prs", type=int, default=3, help="Maximum unpublished PRs to publish per run.") + parser.add_argument("--search-limit", type=int, default=20, help="How many recent commits to inspect.") + parser.add_argument("--max-new-prs", type=int, default=3, help="Maximum unpublished changes to publish per run.") parser.add_argument("--token-env", help="Environment variable containing a GitHub token.") parser.add_argument("--codex-bin", default="codex", help="Codex executable to invoke.") parser.add_argument("--model", help="Optional Codex model override.") @@ -105,23 +113,48 @@ def published_pr_numbers(signals_dir: Path) -> set[int]: return published -def recent_merged_prs(repo: str, token: str | None, search_limit: int) -> list[dict[str, Any]]: +def published_commit_shas(signals_dir: Path) -> set[str]: + published: set[str] = set() + for path in sorted(signals_dir.glob("*.json")): + payload = load_json(path) + validation = validate_signal(payload) + if not validation.ok: + raise SystemExit(f"Signal validation failed for {path}:\n- " + "\n- ".join(validation.errors)) + for url in payload.get("source_refs", {}).get("commit_urls", []): + if not isinstance(url, str): + continue + match = COMMIT_URL_RE.search(url) + if match: + published.add(match.group(1)) + return published + + +def recent_commits(repo: str, token: str | None, search_limit: int) -> list[dict[str, Any]]: default_branch = repo_default_branch(repo, token) - query = urllib.parse.quote_plus(f"repo:{repo} is:pr is:merged base:{default_branch}") payload, _ = github_request( - f"https://api.github.com/search/issues?q={query}&sort=updated&order=desc&per_page={search_limit}", + f"https://api.github.com/repos/{repo}/commits?sha={urllib.parse.quote(default_branch)}&per_page={search_limit}", token, ) - items = payload.get("items") - if not isinstance(items, list): - raise SystemExit("Expected search/issues to return an items list") + if not isinstance(payload, list): + raise SystemExit("Expected commits list payload from GitHub API") results: list[dict[str, Any]] = [] - for item in items: - number = item.get("number") - title = item.get("title") + for item in payload: + sha = item.get("sha") + commit = item.get("commit") url = item.get("html_url") - if isinstance(number, int) and isinstance(title, str) and isinstance(url, str): - results.append({"number": number, "title": title, "url": url}) + if not isinstance(sha, str) or not isinstance(commit, dict) or not isinstance(url, str): + continue + message = commit.get("message") + if not isinstance(message, str) or not message: + continue + results.append( + { + "sha": sha, + "title": message.strip().splitlines()[0], + "url": url, + "committed_at": (commit.get("committer") or {}).get("date"), + } + ) return results @@ -137,8 +170,13 @@ def candidate_score(title: str) -> int: return score -def signal_paths(pr_number: int, args: argparse.Namespace) -> tuple[Path, Path, Path]: - stem = f"openai-codex-pr-{pr_number}" +def signal_paths(candidate: dict[str, Any], args: argparse.Namespace) -> tuple[Path, Path, Path]: + pr_number = candidate.get("pr_number") + stem = ( + f"openai-codex-pr-{pr_number}" + if isinstance(pr_number, int) + else f"openai-codex-commit-{candidate['sha'][:12]}" + ) bundles_dir = Path(args.bundles_dir) analysis_dir = Path(args.analysis_dir) signals_dir = Path(args.signals_dir) @@ -177,19 +215,37 @@ def main() -> None: token = os.environ.get(token_env) root = repo_root() signals_dir = (root / args.signals_dir).resolve() - published = published_pr_numbers(signals_dir) - candidates = recent_merged_prs(args.repo, token, args.search_limit) - unpublished = [item for item in candidates if item["number"] not in published] + published_prs = published_pr_numbers(signals_dir) + published_shas = published_commit_shas(signals_dir) + commits = recent_commits(args.repo, token, args.search_limit) + candidates: list[dict[str, Any]] = [] + seen_candidate_keys: set[tuple[str, int | str]] = set() + for commit in commits: + if commit["sha"] in published_shas: + continue + pr_number = maybe_promote_commit_to_pr(args.repo, commit["sha"], token) + if pr_number is not None and pr_number in published_prs: + continue + candidate_key: tuple[str, int | str] = ( + ("pr", pr_number) if pr_number is not None else ("commit", commit["sha"]) + ) + if candidate_key in seen_candidate_keys: + continue + seen_candidate_keys.add(candidate_key) + candidates.append({**commit, "pr_number": pr_number}) + + unpublished = candidates unpublished = [item for item in unpublished if candidate_score(item["title"]) > 0][: args.max_new_prs] created = 0 for candidate in reversed(unpublished): - bundle_path, analysis_path, signal_path = signal_paths(candidate["number"], args) - bundle = build_pr_bundle( - args.repo, - candidate["number"], - token, - [f"Discovered via hourly merged-PR sync: {candidate['url']}"], + bundle_path, analysis_path, signal_path = signal_paths(candidate, args) + notes = [f"Discovered via continuous upstream commit sync: {candidate['url']}"] + pr_number = candidate.get("pr_number") + bundle = ( + build_pr_bundle(args.repo, pr_number, token, notes) + if isinstance(pr_number, int) + else build_commit_bundle(args.repo, candidate["sha"], token, notes) ) dump_json(root / bundle_path, bundle) @@ -227,8 +283,10 @@ def main() -> None: json.dumps( { "repo": args.repo, - "published_prs_seen": len(published), - "recent_prs_scanned": len(candidates), + "published_prs_seen": len(published_prs), + "published_commits_seen": len(published_shas), + "recent_commits_scanned": len(commits), + "unpublished_changes_considered": len(candidates), "new_signals_created": created, "release_delta_refreshed": release_delta_refreshed, }, diff --git a/scripts/github/sync_prerelease_signals.py b/scripts/github/sync_prerelease_signals.py deleted file mode 100644 index de6220e6..00000000 --- a/scripts/github/sync_prerelease_signals.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 -"""Sync unpublished Decodex signals from the latest Codex prerelease compare.""" - -from __future__ import annotations - -import argparse -import subprocess -import sys -from pathlib import Path - -SCRIPT_HOME = Path(__file__).resolve().parent - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--repo", default="openai/codex", help="GitHub repository in owner/name format.") - parser.add_argument("--release-delta", default="site/src/content/release-deltas/openai-codex-latest.json") - parser.add_argument("--stable-tag", help="Stable tag to compare from. Defaults to the latest stable release.") - parser.add_argument("--preview-tag", help="Prerelease tag to compare to. Defaults to the latest prerelease.") - parser.add_argument("--signals-dir", default="site/src/content/signals") - parser.add_argument("--bundles-dir", default="artifacts/github/bundles") - parser.add_argument("--analysis-dir", default="artifacts/github/analysis") - parser.add_argument("--token-env", help="Environment variable containing a GitHub token.") - parser.add_argument("--codex-bin", default="codex", help="Codex executable to invoke.") - parser.add_argument("--model", help="Optional Codex model override.") - parser.add_argument("--max-prs", type=int, help="Optional limit for debugging or partial runs.") - parser.add_argument("--dry-run", action="store_true", help="Print target PRs without generating new content.") - return parser.parse_args() - - -def append_optional(command: list[str], flag: str, value: str | int | None) -> None: - if value is None: - return - command.extend([flag, str(value)]) - - -def main() -> None: - args = parse_args() - command = [ - sys.executable, - str(SCRIPT_HOME / "backfill_release_range.py"), - "--repo", - args.repo, - "--release-delta", - args.release_delta, - "--signals-dir", - args.signals_dir, - "--bundles-dir", - args.bundles_dir, - "--analysis-dir", - args.analysis_dir, - "--codex-bin", - args.codex_bin, - "--refresh-release-delta-first", - "--refresh-stable-limit", - "1", - "--refresh-preview-limit", - "1", - "--refresh-pair-limit", - "1", - ] - append_optional(command, "--stable-tag", args.stable_tag) - append_optional(command, "--preview-tag", args.preview_tag) - append_optional(command, "--token-env", args.token_env) - append_optional(command, "--model", args.model) - append_optional(command, "--max-prs", args.max_prs) - if args.dry_run: - command.append("--dry-run") - - completed = subprocess.run(command, check=False) - raise SystemExit(completed.returncode) - - -if __name__ == "__main__": - main() From f76def97ebfd3f6f2abd56462ae917c19c747bea Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 13 May 2026 14:53:49 +0800 Subject: [PATCH 5/6] {"schema":"decodex/commit/1","summary":"Add Radar artifact archive retention policy","authority":"manual"} --- README.md | 2 + artifacts/archive/README.md | 8 ++ artifacts/archive/index/.gitkeep | 1 + artifacts/github/README.md | 4 + docs/decisions/index.md | 3 + .../radar-artifact-release-archives.md | 35 ++++++ docs/index.md | 3 + docs/reference/workspace-layout.md | 5 + docs/runbook/index.md | 3 + docs/runbook/radar-artifact-archive.md | 71 +++++++++++ docs/spec/index.md | 3 + docs/spec/radar-artifact-retention.md | 116 ++++++++++++++++++ scripts/github/README.md | 5 + 13 files changed, 259 insertions(+) create mode 100644 artifacts/archive/README.md create mode 100644 artifacts/archive/index/.gitkeep create mode 100644 docs/decisions/radar-artifact-release-archives.md create mode 100644 docs/runbook/radar-artifact-archive.md create mode 100644 docs/spec/radar-artifact-retention.md diff --git a/README.md b/README.md index 27ec293b..a695806f 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ runtime. - `scripts/github/` owns deterministic GitHub bundle, release-delta, render, and validation scripts. - `artifacts/github/` owns checked-in GitHub bundles and editorial analysis drafts. +- `artifacts/archive/` owns checked-in recovery manifests for cold Radar batches stored + as GitHub Release assets. - `artifacts/social/` owns checked-in Publisher social draft artifacts. - `plugins/decodex/` owns the installable Decodex plugin and reusable agent-facing skills. diff --git a/artifacts/archive/README.md b/artifacts/archive/README.md new file mode 100644 index 00000000..2bfd9be0 --- /dev/null +++ b/artifacts/archive/README.md @@ -0,0 +1,8 @@ +# Archive Manifests + +This directory stores Git-tracked manifests for cold Radar archive batches. + +Compressed archive payloads do not live in Git. Store them as GitHub Release assets under +dedicated `radar-archive-*` tags, then keep the recovery manifest in `index/`. + +The governing contract is `docs/spec/radar-artifact-retention.md`. diff --git a/artifacts/archive/index/.gitkeep b/artifacts/archive/index/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/artifacts/archive/index/.gitkeep @@ -0,0 +1 @@ + diff --git a/artifacts/github/README.md b/artifacts/github/README.md index 877c1db7..9c09be53 100644 --- a/artifacts/github/README.md +++ b/artifacts/github/README.md @@ -6,5 +6,9 @@ This directory stores checked-in GitHub signal pipeline artifacts. - `analysis/` holds reviewed Codex editorial analysis drafts. - `impact/` holds optional `upstream_impact/v1` classifications. +`bundles/` and `analysis/` are hot raw artifact directories. Keep raw entries in Git for +at most 28 days, then move cold batches to dedicated `radar-archive-*` GitHub Release +assets and keep the recovery manifest under `artifacts/archive/index/`. + Executable automation for these artifacts lives under `scripts/github/`. Repo-local editorial instructions live under `dev/skills/github-signal/`. diff --git a/docs/decisions/index.md b/docs/decisions/index.md index cf34ac19..3e39f870 100644 --- a/docs/decisions/index.md +++ b/docs/decisions/index.md @@ -26,6 +26,9 @@ Question this index answers: "why was it designed this way?" - [`radar-control-plane-publisher.md`](./radar-control-plane-publisher.md) records the stable capability names for upstream Codex intelligence, retained-lane orchestration, and public publishing after the repository integration. +- [`radar-artifact-release-archives.md`](./radar-artifact-release-archives.md) records + why old raw Radar bundles and analysis drafts leave Git after 28 days and move to + dedicated GitHub Release assets with checked-in manifests. - [`static-public-site.md`](./static-public-site.md) records why the public Decodex site remains static while runtime/operator behavior stays in the CLI and local control plane. diff --git a/docs/decisions/radar-artifact-release-archives.md b/docs/decisions/radar-artifact-release-archives.md new file mode 100644 index 00000000..86b08526 --- /dev/null +++ b/docs/decisions/radar-artifact-release-archives.md @@ -0,0 +1,35 @@ +# Radar Artifact Release Archives + +Status: accepted + +Date: 2026-05-13 + +Question: Where should Decodex keep old raw Radar artifacts after the short Git hot +window? + +Decision: Keep raw GitHub bundles and editorial analysis drafts in Git for at most 28 +days, then archive cold batches as dedicated GitHub Release assets. The repository keeps +only a manifest under `artifacts/archive/index/`; compressed archives are not committed +to Git. + +Rationale: + +- Continuous Radar should inspect every upstream Codex commit, but the repository should + not become a permanent raw-data warehouse. +- Public signal entries, upstream-impact records, and published social drafts are small, + curated, reviewable artifacts and can remain in Git. +- Raw bundles and analysis drafts can be recovered from an archive asset when needed, as + long as the manifest records file paths, checksums, source commit, and release URL. +- GitHub Release assets are better than checked-in compressed archives because they keep + the Git tree readable while preserving a durable download location tied to the repo. + +Consequences: + +- `artifacts/github/bundles/` and `artifacts/github/analysis/` are hot working + directories, not permanent history. +- Archive releases use a separate tag namespace such as `radar-archive-2026-05` so they + are not confused with Decodex product releases. +- Normal archive cleanup does not shrink existing Git history. A history rewrite remains + a separate explicit operation. +- Automation that prunes raw artifacts must add or update an archive manifest in the + same change that removes files from Git. diff --git a/docs/index.md b/docs/index.md index a26d3e3f..f2dbd37e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -40,6 +40,9 @@ The split below is by question type, not by human-versus-agent audience. `docs/decisions/` - Need the current Radar, Control Plane, and Publisher capability boundary -> `docs/decisions/radar-control-plane-publisher.md` +- Need Radar raw-artifact retention, archive manifests, or GitHub Release archive + procedure -> `docs/spec/radar-artifact-retention.md` and + `docs/runbook/radar-artifact-archive.md` - Need the raw machine-authored research run artifacts used by shipped research tooling -> `docs/research/` - Need reusable agent-facing Decodex usage instructions -> `plugins/decodex/` diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md index eaad5b04..920f7e72 100644 --- a/docs/reference/workspace-layout.md +++ b/docs/reference/workspace-layout.md @@ -21,6 +21,7 @@ should not be treated as repository source. | `scripts/github/` | Deterministic GitHub collection, normalization, render, validation, and sync scripts for public signal content. | | `scripts/config/` | Repository automation scripts for config-derived artifacts. | | `artifacts/github/` | Checked-in GitHub change bundles and editorial analysis drafts used by the public signal pipeline. | +| `artifacts/archive/` | Checked-in manifests for cold Radar archive batches stored as GitHub Release assets. | | `artifacts/social/` | Checked-in Publisher social post drafts and publication evidence. | | `dev/skills/` | Repository-development skills for Radar upstream triage, code analysis, release analysis, GitHub signal drafting, and X post drafting. These are not part of installable plugin distribution. | | `plugins/decodex/` | Canonical installable Decodex plugin source and reusable agent-facing skills, including manual CLI, automation, commit, land, and labels. | @@ -85,6 +86,10 @@ skills remain manual Radar/Publisher reasoning surfaces unless a script explicit wires them into a checked-in contract. Generated GitHub bundles and analysis drafts live under `artifacts/github/` and must stay explicit and checked into the repository. +Raw bundles and analysis drafts are hot artifacts with a 28-day Git retention window. +Older raw batches move to dedicated GitHub Release assets, with recovery manifests kept +under `artifacts/archive/index/`. + `artifacts/github/impact/` may hold `upstream_impact/v1` classifications when an upstream Codex change has public-signal, Control Plane, or Publisher implications. `artifacts/social/` may hold `social_post_draft/v1` drafts before external publication. diff --git a/docs/runbook/index.md b/docs/runbook/index.md index fec3c2f0..ac428447 100644 --- a/docs/runbook/index.md +++ b/docs/runbook/index.md @@ -33,6 +33,9 @@ Question this index answers: "which sequence should I execute?" - [`local-github-signal-workflow.md`](./local-github-signal-workflow.md) for collecting GitHub change bundles, running Codex editorial analysis, validating signal entries, and publishing static site content. +- [`radar-artifact-archive.md`](./radar-artifact-archive.md) for moving raw Radar + bundles and analysis drafts out of Git after the 28-day hot window while keeping + release-asset recovery manifests checked in. - [`social-publishing-workflow.md`](./social-publishing-workflow.md) for turning Radar evidence into reviewed `@decodexspace` X drafts and recording publication evidence. - [`recover-review-handoff.md`](./recover-review-handoff.md) for diagnosing and diff --git a/docs/runbook/radar-artifact-archive.md b/docs/runbook/radar-artifact-archive.md new file mode 100644 index 00000000..7a024a36 --- /dev/null +++ b/docs/runbook/radar-artifact-archive.md @@ -0,0 +1,71 @@ +# Radar Artifact Archive + +Goal: Move old raw Radar artifacts out of Git after the 28-day hot window while +keeping public signals and archive recovery evidence available. + +Read this when: +- You are pruning `artifacts/github/bundles/` or `artifacts/github/analysis/`. +- You need to package cold Radar artifacts for a GitHub Release asset. +- You are reviewing whether a public signal can remain after its raw bundle leaves Git. + +Governing spec: +- [`../spec/radar-artifact-retention.md`](../spec/radar-artifact-retention.md) + +## Archive candidates + +Archive these after the 28-day hot window: + +- `artifacts/github/bundles/*.json` +- `artifacts/github/analysis/*.analysis.json` +- optional raw source snapshots if a future cache directory is added +- optional ledger exports if they are generated for a closed archive batch + +Do not archive these as part of raw cleanup: + +- `site/src/content/signals/*.json` +- the current `site/src/content/release-deltas/openai-codex-latest.json` +- `artifacts/github/impact/*.json` with active Control Plane or Publisher relevance +- approved or published `artifacts/social/x/*.json` +- `artifacts/archive/index/*.json` + +## Procedure + +1. Choose the archive window. + - Prefer a calendar month or a release-window name. + - Ensure the selected raw artifacts are outside the 28-day hot window. + - For artifacts without embedded collection timestamps, use the paired signal + `published_at` or record the operator-selected evidence date in the manifest. + +2. Build the archive directory. + - Preserve repository-relative paths inside the archive. + - Include paired bundle and analysis files together when both exist. + - Include a local `manifest.json` with `schema = "radar_archive_manifest/v1"`. + +3. Compress the archive. + - Preferred asset name: + `decodex-radar-archive-.tar.zst` + - Generate `SHA256SUMS`. + +4. Create a dedicated GitHub Release. + - Use a non-product tag such as `radar-archive-2026-05`. + - Upload the compressed archive, `manifest.json`, and `SHA256SUMS`. + - Do not reuse application release tags. + +5. Commit the repository cleanup. + - Add `artifacts/archive/index/.json`. + - Remove the archived raw files from `artifacts/github/bundles/` and + `artifacts/github/analysis/`. + - Keep public signals and curated impact/social artifacts in place. + +6. Verify recovery metadata. + - Confirm the manifest paths match the removed files. + - Confirm `SHA256SUMS` matches the uploaded archive asset. + - Confirm any public signal that still references a removed bundle has source refs + back to GitHub and an archive manifest pointer for raw recovery. + +## Operator notes + +Archiving raw files reduces future working-tree size and review noise. It does not +shrink historical Git objects that already contain the old JSON. A repository history +rewrite is a separate, explicit maintenance operation and should not be part of normal +monthly Radar archiving. diff --git a/docs/spec/index.md b/docs/spec/index.md index 3a313f98..0cd0c62a 100644 --- a/docs/spec/index.md +++ b/docs/spec/index.md @@ -64,6 +64,9 @@ Then keep the body explicit: by the static site. - [`release-delta.md`](./release-delta.md) defines the stable-versus-prerelease summary artifact used by the homepage release-delta module. +- [`radar-artifact-retention.md`](./radar-artifact-retention.md) defines the 28-day Git + hot window for raw Radar artifacts, the warm curated artifacts that stay in Git, and + the GitHub Release archive manifest contract. - [`upstream-impact.md`](./upstream-impact.md) defines how Radar classifies upstream Codex changes for public signals, Control Plane follow-up, and Publisher angles. - [`social-post-draft.md`](./social-post-draft.md) defines the checked-in social draft diff --git a/docs/spec/radar-artifact-retention.md b/docs/spec/radar-artifact-retention.md new file mode 100644 index 00000000..a287d4fb --- /dev/null +++ b/docs/spec/radar-artifact-retention.md @@ -0,0 +1,116 @@ +# Radar Artifact Retention + +Purpose: Define which Decodex Radar and Publisher artifacts stay in Git, which raw +artifacts are kept only in a short hot window, and how cold archives remain +recoverable. + +Status: normative + +Read this when: +- You are deciding whether a GitHub bundle, analysis draft, signal entry, upstream + impact note, or social draft should remain checked in. +- You are preparing an archive batch for old Radar artifacts. +- You are adding automation that prunes or restores `artifacts/github/` material. + +Not this document: +- The GitHub change-bundle schema. +- The signal-entry schema. +- The step-by-step archive procedure. + +Defines: +- Hot, warm, and cold Radar artifact classes. +- The maximum Git hot-window for raw bundles and analysis drafts. +- The GitHub Release asset archive contract. +- The manifest record that keeps cold artifacts traceable from Git. + +## Retention classes + +Decodex uses three retention classes for Radar and Publisher data. + +| Class | Storage | Examples | Retention | +| --- | --- | --- | --- | +| Hot raw artifacts | Git working tree | `artifacts/github/bundles/*.json`, `artifacts/github/analysis/*.analysis.json` | At most 28 days in Git after collection or publication. | +| Warm curated artifacts | Git working tree | `site/src/content/signals/*.json`, `site/src/content/release-deltas/openai-codex-latest.json`, `artifacts/github/impact/*.json`, approved or published `artifacts/social/x/*.json` | Retained in Git while they are part of the public site, Control Plane review trail, or Publisher record. | +| Cold raw archive | GitHub Release assets plus a Git manifest | Archived bundle and analysis batches, optional source snapshots, optional ledger exports | Retained outside the Git tree. Git keeps only the manifest. | + +The hot raw window is intentionally short. Continuous Radar should keep every upstream +commit traceable, but it must not make the repository a permanent raw-data warehouse. + +## Hot raw artifact rule + +Raw GitHub bundles and local editorial analysis drafts must not remain in Git for more +than 28 days after collection or publication unless a human explicitly marks the batch +as still active. + +For existing artifacts that do not carry their own collection timestamp, the retention +clock should use the paired `signal_entry/v1.published_at` when available. If no paired +signal exists, the archive batch must record the operator-selected evidence date in its +manifest. + +The 28-day limit applies to the raw supporting material, not to the public signal +entry. A signal entry may outlive its raw bundle when the archive manifest preserves how +to recover the original bundle and analysis draft. + +## Warm curated artifact rule + +Keep these artifacts in Git unless a separate content cleanup explicitly removes them: + +- published `signal_entry/v1` files under `site/src/content/signals/` +- the current homepage `release_delta/v1` artifact +- `upstream_impact/v1` records that affect Decodex Control Plane or Publisher follow-up +- approved or published `social_post_draft/v1` records +- archive manifests under `artifacts/archive/index/` + +Draft or rejected social artifacts may be archived after the same 28-day hot window +unless they document a still-useful editorial boundary. + +## Cold archive destination + +Cold raw artifacts must be stored as GitHub Release assets under a dedicated Radar +archive tag. They must not be committed as compressed archives inside the repository. + +Use tag names that cannot be confused with product releases, for example: + +- `radar-archive-2026-05` +- `radar-archive-rust-v0.130.0-to-rust-v0.131.0-alpha.9` + +Each archive release should include: + +- one compressed archive, preferably `decodex-radar-archive-.tar.zst` +- `manifest.json` +- `SHA256SUMS` +- optional detached signatures when the operator has signing material available + +Git keeps a copy of the manifest under `artifacts/archive/index/.json`. +That manifest is the durable pointer from the repository to the release assets. + +## Manifest contract + +The archive manifest schema identifier is: + +- `radar_archive_manifest/v1` + +The manifest must contain: + +| Field | Type | Notes | +| --- | --- | --- | +| `schema` | string | Must be `radar_archive_manifest/v1`. | +| `archive_id` | string | Stable archive identifier. | +| `created_at` | string | UTC timestamp for archive creation. | +| `retention_days` | number | Must be `28` unless a later spec changes the policy. | +| `source_commit` | string | Repository commit used to select and package files. | +| `release_tag` | string | GitHub tag holding the archive assets. | +| `release_url` | string | GitHub Release URL when available. | +| `archive_asset` | object | Name, size, and SHA-256 for the compressed archive. | +| `checksum_asset` | object | Name and SHA-256 for `SHA256SUMS`. | +| `files` | array | Archived file records. | + +Each `files[]` record must contain: + +- `path` +- `kind` (`bundle`, `analysis`, `source_cache`, `ledger_export`, or `other`) +- `sha256` +- `size_bytes` + +When the archive batch removes files from Git, the same commit must add the manifest +that points to the GitHub Release asset. diff --git a/scripts/github/README.md b/scripts/github/README.md index 1b505035..4d1e653b 100644 --- a/scripts/github/README.md +++ b/scripts/github/README.md @@ -72,3 +72,8 @@ Repo-local skills under `dev/skills/` are reasoning instructions for the Codex analysis step and for manual Radar/Publisher work. They do not introduce extra intermediate artifact schemas unless the conclusion is promoted into one of the checked-in contracts listed above. + +Raw bundles and analysis drafts are retained in Git for a 28-day hot window. Archive +older raw batches as dedicated `radar-archive-*` GitHub Release assets and commit only +the recovery manifest under `artifacts/archive/index/`. See +`docs/spec/radar-artifact-retention.md` and `docs/runbook/radar-artifact-archive.md`. From 86f8f22d4edfc6be4c29437a77570a892374d45b Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Wed, 13 May 2026 15:08:43 +0800 Subject: [PATCH 6/6] {"schema":"decodex/commit/1","summary":"Add Radar ledger and curated site feed","authority":"manual"} --- .gitignore | 1 + README.md | 4 + docs/index.md | 2 + docs/reference/workspace-layout.md | 9 + docs/runbook/local-github-signal-workflow.md | 17 +- docs/spec/index.md | 2 + docs/spec/radar-ledger.md | 94 ++++ docs/spec/release-delta.md | 6 + docs/spec/signal-entry.md | 16 + docs/spec/site-contract.md | 9 + scripts/github/README.md | 9 + scripts/github/radar_ledger.py | 545 +++++++++++++++++++ scripts/github/sync_latest_signals.py | 189 +++++-- scripts/github/test_radar_ledger.py | 115 ++++ site/src/components/ReleaseDeltaPanel.astro | 28 +- site/src/lib/signal-feed.ts | 27 + site/src/pages/index.astro | 14 +- site/src/styles/global.css | 7 + 18 files changed, 1035 insertions(+), 59 deletions(-) create mode 100644 docs/spec/radar-ledger.md create mode 100644 scripts/github/radar_ledger.py create mode 100644 scripts/github/test_radar_ledger.py diff --git a/.gitignore b/.gitignore index b980b293..e400ab0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # AI .codex +.decodex # Editor .vscode diff --git a/README.md b/README.md index a695806f..639b6b5f 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ Runtime authority stays in `apps/decodex/src/`, the registered project contracts Public site authority stays in `site/`, `scripts/github/`, `artifacts/github/`, and the site/content specs. +Historical Radar trace is local by default. `scripts/github/sync_latest_signals.py` +writes `.decodex/radar.sqlite3` so every inspected upstream commit can be tracked +without publishing every low-level or skipped item to the static site or Git history. + ## Runtime platform support - The Decodex runtime contract is Unix-only: macOS and Linux. diff --git a/docs/index.md b/docs/index.md index f2dbd37e..a36b9d25 100644 --- a/docs/index.md +++ b/docs/index.md @@ -43,6 +43,8 @@ The split below is by question type, not by human-versus-agent audience. - Need Radar raw-artifact retention, archive manifests, or GitHub Release archive procedure -> `docs/spec/radar-artifact-retention.md` and `docs/runbook/radar-artifact-archive.md` +- Need historical upstream commit trace, skipped-candidate state, or local Radar ledger + behavior -> `docs/spec/radar-ledger.md` - Need the raw machine-authored research run artifacts used by shipped research tooling -> `docs/research/` - Need reusable agent-facing Decodex usage instructions -> `plugins/decodex/` diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md index 920f7e72..ab87a064 100644 --- a/docs/reference/workspace-layout.md +++ b/docs/reference/workspace-layout.md @@ -118,6 +118,15 @@ Runtime state that belongs to the local operator, not to this repository, lives - `accounts.jsonl` stores the optional shared ChatGPT account pool used for Codex app-server auth token injection and refresh. - `logs/` stores Decodex process logs. + +Repo-local Radar history that belongs to the current checkout, not to Git, lives under +`.decodex/`: + +- `radar.sqlite3` is the default SQLite ledger for observed upstream Codex commits, + skipped candidates, PR mappings, review status, and artifact links. + +`.decodex/` is ignored by Git. Public curated artifacts and archive manifests remain in +the checked-in tree. - `agent-evidence//` stores local agent-readable diagnosis artifacts, including `handoff-index.json`, `events.jsonl`, `blockers/*.json`, and `runs///capsule.json`. diff --git a/docs/runbook/local-github-signal-workflow.md b/docs/runbook/local-github-signal-workflow.md index 407e51a3..7453023d 100644 --- a/docs/runbook/local-github-signal-workflow.md +++ b/docs/runbook/local-github-signal-workflow.md @@ -17,6 +17,7 @@ Depends on: - `docs/spec/github-change-bundle.md` - `docs/spec/signal-entry.md` - `docs/spec/release-delta.md` +- `docs/spec/radar-ledger.md` - `docs/spec/site-contract.md` - `dev/skills/README.md` @@ -29,7 +30,9 @@ Outputs: ## Workflow 1. Track upstream Codex commits continuously. Treat each commit as a candidate to - understand, then resolve it back to a PR when possible. + understand, then resolve it back to a PR when possible. The sync writes + `.decodex/radar.sqlite3` by default so skipped and deferred commits remain + traceable without becoming public site entries. 2. Triage upstream activity with `dev/skills/codex-upstream-triage/` when the candidate is not already chosen by automation or by the operator. 3. Build a normalized GitHub change bundle under `artifacts/github/bundles/` for @@ -133,6 +136,13 @@ Automated sync entrypoint: - `scripts/github/sync_latest_signals.py` +Bootstrap or inspect local historical trace: + +```bash +python3 scripts/github/radar_ledger.py ingest-existing +python3 scripts/github/radar_ledger.py summary --json +``` + ## Editorial gate Publish only when the change meets at least one of these tests: @@ -140,6 +150,11 @@ Publish only when the change meets at least one of these tests: - it introduces a new capability - it changes user-visible behavior - it offers a clear try-now path +- it explains deprecated, removed, legacy, or migration-relevant behavior + +The homepage feed applies the same posture programmatically: low-impact internal +changes without a try path, capability value, or deprecated/migration cue stay out of +the public feed while remaining available to the ledger and release rollups. Skip or defer entries for: diff --git a/docs/spec/index.md b/docs/spec/index.md index 0cd0c62a..1618bf57 100644 --- a/docs/spec/index.md +++ b/docs/spec/index.md @@ -64,6 +64,8 @@ Then keep the body explicit: by the static site. - [`release-delta.md`](./release-delta.md) defines the stable-versus-prerelease summary artifact used by the homepage release-delta module. +- [`radar-ledger.md`](./radar-ledger.md) defines the local SQLite ledger that keeps + every observed upstream Codex commit traceable without storing all raw history in Git. - [`radar-artifact-retention.md`](./radar-artifact-retention.md) defines the 28-day Git hot window for raw Radar artifacts, the warm curated artifacts that stay in Git, and the GitHub Release archive manifest contract. diff --git a/docs/spec/radar-ledger.md b/docs/spec/radar-ledger.md new file mode 100644 index 00000000..ba5bd022 --- /dev/null +++ b/docs/spec/radar-ledger.md @@ -0,0 +1,94 @@ +# Radar Ledger + +Purpose: Define the local SQLite ledger that keeps every observed upstream Codex commit +traceable without putting every raw or low-value artifact into Git. + +Status: normative + +Read this when: +- You are changing `scripts/github/sync_latest_signals.py`. +- You are importing existing GitHub bundles, analysis drafts, or signal entries into + historical Radar state. +- You need to decide what belongs in local history instead of checked-in public + artifacts. + +Not this document: +- The public `signal_entry/v1` schema. +- The raw-artifact archive procedure. +- The release-delta homepage rendering contract. + +Defines: +- The local ledger path. +- The required ledger tables. +- The relationship between observed commits, reviews, and checked-in artifacts. +- The rule that every upstream commit can have durable trace without becoming a public + site entry. + +## Local storage + +The default local Radar ledger path is: + +```text +.decodex/radar.sqlite3 +``` + +`.decodex/` is ignored by Git. The ledger is local or CI runtime state, not a checked-in +artifact. It may be rebuilt from checked-in warm artifacts and cold archive manifests, +but it is the preferred place for high-frequency trace and skip history. + +## Schema + +The schema is created by `scripts/github/radar_ledger.py`. + +Required tables: + +| Table | Purpose | +| --- | --- | +| `upstream_commit` | One row per observed upstream commit, including SHA, title, URL, commit time, PR number when known, and first/last seen timestamps. | +| `radar_review` | One current review state per commit or PR subject. Status values include `seen`, `skipped`, `watch`, `signal`, `control_plane`, `social`, `deprecated`, and `archived`. | +| `artifact_link` | Links commits or PRs to Git-tracked or archived artifacts, including file path, artifact kind, SHA-256, size, and creation time. | +| `source_cache` | Optional source cache index for fetched remote payloads when a future cache is added. | + +The ledger schema version is stored in `metadata.schema_version`. + +## Artifact boundary + +Use the ledger for: + +- every recent upstream commit observed by continuous Radar +- commits skipped because they are low-signal maintenance +- positive candidates deferred by run budget +- mappings from commits to PRs +- links from commits or PRs to bundles, analysis drafts, signals, impact notes, social + drafts, release deltas, archive manifests, or ledger exports + +Use Git for: + +- curated public site signals +- current release-delta data +- upstream-impact records that affect Decodex Control Plane or Publisher follow-up +- approved or published social drafts +- cold archive manifests + +Do not use Git as the permanent store for every raw bundle, raw source cache, skipped +candidate, retry queue, or long low-value analysis. + +## Sync behavior + +`scripts/github/sync_latest_signals.py` writes the local ledger by default. It records +every recent commit it inspects, including commits that do not become public signals. + +Operators may disable ledger writes with: + +```sh +python3 scripts/github/sync_latest_signals.py --no-ledger +``` + +Existing checked-in artifacts can be imported with: + +```sh +python3 scripts/github/radar_ledger.py ingest-existing +``` + +This import is useful when bootstrapping a new local workspace or rebuilding trace after +raw GitHub bundles move to cold archive assets. diff --git a/docs/spec/release-delta.md b/docs/spec/release-delta.md index ff492021..39213a2c 100644 --- a/docs/spec/release-delta.md +++ b/docs/spec/release-delta.md @@ -147,3 +147,9 @@ in a denser visual style, but it should not silently truncate the list to only a small fixed subset. The release-delta module must remain subordinate to the overall page hierarchy. It may summarize the delta, but it must not replace the main signal feed. + +If the latest stable-to-prerelease pair has no matching public signals yet, the +homepage may choose a signal-bearing comparison as the initial visible panel. The latest +pair must remain available in the comparator options, and its tracked-signal count must +not be inflated to make the latest pair look analyzed when no matching signal evidence +exists. diff --git a/docs/spec/signal-entry.md b/docs/spec/signal-entry.md index dbbd69f3..fecaacf8 100644 --- a/docs/spec/signal-entry.md +++ b/docs/spec/signal-entry.md @@ -89,3 +89,19 @@ Commit-only signals should include: - `why_it_matters` must describe user value, not internal implementation mechanics alone. - `confidence = "weak"` is allowed only when the entry clearly signals uncertainty. - `impact` and `confidence` must be rendered on the homepage card. + +## Homepage inclusion rule + +The signal collection may contain more entries than the homepage feed renders. The +homepage feed includes entries that meet at least one of these conditions: + +- `impact` is `medium` or `high` +- `kind` is `try_now` +- `how_to_try` is present +- `config_flags` is non-empty +- the entry is a confirmed capability +- the entry describes deprecated, removed, legacy, rollback, disabled, or migration- + relevant behavior + +Other low-impact entries may remain checked in for release rollups, source trace, or +archive recovery, but they should not dominate the public feed. diff --git a/docs/spec/site-contract.md b/docs/spec/site-contract.md index 8a786d00..97cfae7f 100644 --- a/docs/spec/site-contract.md +++ b/docs/spec/site-contract.md @@ -49,6 +49,15 @@ The homepage must remain scan-first. Large marketing hero sections, dashboard-st The release-delta module must summarize the latest stable release, the latest prerelease, and the tracked signal differences unlocked by the prerelease without displacing the primary feed. +The primary feed is curated for community-ready signals, not every analyzed upstream +commit. Low-impact internal changes without a try path, capability value, or +deprecated/migration cue may stay in the signal collection, Radar ledger, or release +rollup inputs without appearing in the homepage feed. + +When the latest stable-to-prerelease pair has no matching published public signals, the +homepage may default the comparator to the most recent signal-bearing pair while keeping +the latest pair visible in the comparator options. + ## Allowed filters The MVP filter set is: diff --git a/scripts/github/README.md b/scripts/github/README.md index 4d1e653b..449f3c5a 100644 --- a/scripts/github/README.md +++ b/scripts/github/README.md @@ -7,6 +7,7 @@ Current scripts: - `build_change_bundle.py` - `build_release_delta.py` - `backfill_release_range.py` +- `radar_ledger.py` - `run_codex_analysis.py` - `sync_latest_signals.py` - `validate_change_bundle.py` @@ -53,6 +54,14 @@ python3 scripts/github/sync_latest_signals.py \ --max-new-prs 3 ``` +The sync writes a local SQLite Radar ledger at `.decodex/radar.sqlite3` by default. +Use `--no-ledger` only for throwaway runs. To bootstrap the ledger from existing +checked-in artifacts: + +```bash +python3 scripts/github/radar_ledger.py ingest-existing +``` + Release-window gap fill: ```bash diff --git a/scripts/github/radar_ledger.py b/scripts/github/radar_ledger.py new file mode 100644 index 00000000..f6c032d1 --- /dev/null +++ b/scripts/github/radar_ledger.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 +"""Maintain the local Decodex Radar SQLite ledger.""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import re +import sqlite3 +import sys +from pathlib import Path +from typing import Any + +SCRIPT_HOME = Path(__file__).resolve().parent +if str(SCRIPT_HOME) not in sys.path: + sys.path.insert(0, str(SCRIPT_HOME)) + +from contracts import load_json, utc_now_iso, validate_bundle, validate_signal # noqa: E402 + +SCHEMA_VERSION = 1 +DEFAULT_LEDGER_PATH = ".decodex/radar.sqlite3" +COMMIT_URL_RE = re.compile(r"/commit/([0-9a-f]{7,40})$") +PR_URL_RE = re.compile(r"/pull/(\d+)$") +SUBJECT_KINDS = {"commit", "pr"} +REVIEW_STATUSES = { + "seen", + "skipped", + "watch", + "signal", + "control_plane", + "social", + "deprecated", + "archived", +} +CONFIDENCE_VALUES = {"confirmed", "likely", "weak"} +ARTIFACT_KINDS = { + "bundle", + "analysis", + "signal", + "upstream_impact", + "social_draft", + "release_delta", + "archive_manifest", + "ledger_export", +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db", default=DEFAULT_LEDGER_PATH, help="SQLite ledger path.") + subcommands = parser.add_subparsers(dest="command", required=True) + + subcommands.add_parser("init", help="Initialize the ledger schema.") + + ingest = subcommands.add_parser("ingest", help="Ingest one bundle and optional derived artifacts.") + ingest.add_argument("--bundle", required=True, help="Path to a github_change_bundle/v1 JSON file.") + ingest.add_argument("--analysis", help="Optional analysis draft path.") + ingest.add_argument("--signal", help="Optional rendered signal_entry/v1 path.") + + ingest_existing = subcommands.add_parser( + "ingest-existing", + help="Ingest existing checked-in bundles, analyses, and signals.", + ) + ingest_existing.add_argument("--bundles-dir", default="artifacts/github/bundles") + ingest_existing.add_argument("--analysis-dir", default="artifacts/github/analysis") + ingest_existing.add_argument("--signals-dir", default="site/src/content/signals") + + summary = subcommands.add_parser("summary", help="Print ledger counts.") + summary.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") + + return parser.parse_args() + + +def connect(path: str | Path) -> sqlite3.Connection: + db_path = Path(path) + db_path.parent.mkdir(parents=True, exist_ok=True) + connection = sqlite3.connect(db_path) + connection.row_factory = sqlite3.Row + initialize(connection) + return connection + + +def initialize(connection: sqlite3.Connection) -> None: + connection.executescript( + """ + PRAGMA foreign_keys = ON; + + CREATE TABLE IF NOT EXISTS metadata ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS upstream_commit ( + repo TEXT NOT NULL, + sha TEXT NOT NULL, + title TEXT NOT NULL, + url TEXT NOT NULL, + committed_at TEXT, + pr_number INTEGER, + first_seen_at TEXT NOT NULL, + last_seen_at TEXT NOT NULL, + PRIMARY KEY (repo, sha) + ); + + CREATE TABLE IF NOT EXISTS radar_review ( + repo TEXT NOT NULL, + subject_kind TEXT NOT NULL CHECK (subject_kind IN ('commit', 'pr')), + subject_id TEXT NOT NULL, + status TEXT NOT NULL CHECK ( + status IN ( + 'seen', + 'skipped', + 'watch', + 'signal', + 'control_plane', + 'social', + 'deprecated', + 'archived' + ) + ), + reason TEXT NOT NULL DEFAULT '', + confidence TEXT CHECK (confidence IN ('confirmed', 'likely', 'weak')), + reviewed_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + PRIMARY KEY (repo, subject_kind, subject_id) + ); + + CREATE TABLE IF NOT EXISTS artifact_link ( + repo TEXT NOT NULL, + subject_kind TEXT NOT NULL CHECK (subject_kind IN ('commit', 'pr')), + subject_id TEXT NOT NULL, + artifact_kind TEXT NOT NULL CHECK ( + artifact_kind IN ( + 'bundle', + 'analysis', + 'signal', + 'upstream_impact', + 'social_draft', + 'release_delta', + 'archive_manifest', + 'ledger_export' + ) + ), + path TEXT NOT NULL, + sha256 TEXT NOT NULL, + size_bytes INTEGER NOT NULL, + created_at TEXT NOT NULL, + PRIMARY KEY (repo, subject_kind, subject_id, artifact_kind, path) + ); + + CREATE TABLE IF NOT EXISTS source_cache ( + url TEXT PRIMARY KEY, + etag TEXT, + body_sha256 TEXT NOT NULL, + fetched_at TEXT NOT NULL, + cache_path TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_upstream_commit_pr + ON upstream_commit (repo, pr_number); + + CREATE INDEX IF NOT EXISTS idx_radar_review_status + ON radar_review (status, reviewed_at); + """ + ) + connection.execute( + """ + INSERT INTO metadata (key, value) + VALUES ('schema_version', ?) + ON CONFLICT(key) DO UPDATE SET value = excluded.value + """, + (str(SCHEMA_VERSION),), + ) + connection.commit() + + +def path_for_storage(path: str | Path) -> str: + resolved = Path(path).resolve() + cwd = Path.cwd().resolve() + try: + return str(resolved.relative_to(cwd)) + except ValueError: + return str(resolved) + + +def file_digest(path: str | Path) -> tuple[str, int]: + payload = Path(path).read_bytes() + return hashlib.sha256(payload).hexdigest(), len(payload) + + +def require_member(value: str, allowed: set[str], label: str) -> None: + if value not in allowed: + raise ValueError(f"{label} must be one of {sorted(allowed)}") + + +def record_commit( + connection: sqlite3.Connection, + *, + repo: str, + sha: str, + title: str, + url: str, + committed_at: str | None = None, + pr_number: int | None = None, + seen_at: str | None = None, +) -> None: + timestamp = seen_at or utc_now_iso() + connection.execute( + """ + INSERT INTO upstream_commit ( + repo, + sha, + title, + url, + committed_at, + pr_number, + first_seen_at, + last_seen_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(repo, sha) DO UPDATE SET + title = excluded.title, + url = excluded.url, + committed_at = COALESCE(excluded.committed_at, upstream_commit.committed_at), + pr_number = COALESCE(excluded.pr_number, upstream_commit.pr_number), + last_seen_at = excluded.last_seen_at + """, + (repo, sha, title, url, committed_at, pr_number, timestamp, timestamp), + ) + + +def record_review( + connection: sqlite3.Connection, + *, + repo: str, + subject_kind: str, + subject_id: str, + status: str, + reason: str, + confidence: str | None = None, + reviewed_at: str | None = None, +) -> None: + require_member(subject_kind, SUBJECT_KINDS, "subject_kind") + require_member(status, REVIEW_STATUSES, "status") + if confidence is not None: + require_member(confidence, CONFIDENCE_VALUES, "confidence") + timestamp = reviewed_at or utc_now_iso() + connection.execute( + """ + INSERT INTO radar_review ( + repo, + subject_kind, + subject_id, + status, + reason, + confidence, + reviewed_at, + updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(repo, subject_kind, subject_id) DO UPDATE SET + status = excluded.status, + reason = excluded.reason, + confidence = excluded.confidence, + reviewed_at = excluded.reviewed_at, + updated_at = excluded.updated_at + """, + (repo, subject_kind, subject_id, status, reason, confidence, timestamp, timestamp), + ) + + +def record_artifact( + connection: sqlite3.Connection, + *, + repo: str, + subject_kind: str, + subject_id: str, + artifact_kind: str, + path: str | Path, + created_at: str | None = None, +) -> None: + require_member(subject_kind, SUBJECT_KINDS, "subject_kind") + require_member(artifact_kind, ARTIFACT_KINDS, "artifact_kind") + digest, size_bytes = file_digest(path) + connection.execute( + """ + INSERT INTO artifact_link ( + repo, + subject_kind, + subject_id, + artifact_kind, + path, + sha256, + size_bytes, + created_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(repo, subject_kind, subject_id, artifact_kind, path) DO UPDATE SET + sha256 = excluded.sha256, + size_bytes = excluded.size_bytes, + created_at = excluded.created_at + """, + ( + repo, + subject_kind, + subject_id, + artifact_kind, + path_for_storage(path), + digest, + size_bytes, + created_at or utc_now_iso(), + ), + ) + + +def subject_for_bundle(bundle: dict[str, Any]) -> tuple[str, str, str]: + repo = bundle["repo"] + primary_pr = bundle.get("primary_pr") + if isinstance(primary_pr, dict) and isinstance(primary_pr.get("number"), int): + return repo, "pr", str(primary_pr["number"]) + first_commit = bundle["commits"][0] + return repo, "commit", first_commit["sha"] + + +def record_bundle( + connection: sqlite3.Connection, + bundle: dict[str, Any], + bundle_path: str | Path, + *, + status: str = "watch", + reason: str = "Imported from GitHub change bundle.", +) -> tuple[str, str, str]: + validation = validate_bundle(bundle) + if not validation.ok: + raise ValueError("Bundle validation failed:\n- " + "\n- ".join(validation.errors)) + + repo, subject_kind, subject_id = subject_for_bundle(bundle) + primary_pr = bundle.get("primary_pr") + pr_number = primary_pr.get("number") if isinstance(primary_pr, dict) else None + for commit in bundle["commits"]: + record_commit( + connection, + repo=repo, + sha=commit["sha"], + title=commit["message"], + url=commit["url"], + committed_at=commit.get("committed_at"), + pr_number=pr_number if isinstance(pr_number, int) else None, + ) + record_review( + connection, + repo=repo, + subject_kind=subject_kind, + subject_id=subject_id, + status=status, + reason=reason, + confidence="confirmed" if status == "signal" else None, + ) + record_artifact( + connection, + repo=repo, + subject_kind=subject_kind, + subject_id=subject_id, + artifact_kind="bundle", + path=bundle_path, + ) + return repo, subject_kind, subject_id + + +def subject_refs_for_signal(signal: dict[str, Any]) -> list[tuple[str, str, str]]: + refs = signal.get("source_refs", {}) + repo = refs.get("repo") + if not isinstance(repo, str): + return [] + subjects: list[tuple[str, str, str]] = [] + pr_url = refs.get("pr_url") + if isinstance(pr_url, str): + match = PR_URL_RE.search(pr_url) + if match: + subjects.append((repo, "pr", match.group(1))) + for url in refs.get("commit_urls", []): + if not isinstance(url, str): + continue + match = COMMIT_URL_RE.search(url) + if match: + subjects.append((repo, "commit", match.group(1))) + return subjects + + +def record_signal_artifact(connection: sqlite3.Connection, signal_path: str | Path) -> list[tuple[str, str, str]]: + signal = load_json(signal_path) + validation = validate_signal(signal) + if not validation.ok: + raise ValueError(f"Signal validation failed for {signal_path}:\n- " + "\n- ".join(validation.errors)) + + subjects = subject_refs_for_signal(signal) + for repo, subject_kind, subject_id in subjects: + record_review( + connection, + repo=repo, + subject_kind=subject_kind, + subject_id=subject_id, + status="signal", + reason=f"Published signal_entry/v1: {signal['slug']}", + confidence=signal["confidence"], + ) + record_artifact( + connection, + repo=repo, + subject_kind=subject_kind, + subject_id=subject_id, + artifact_kind="signal", + path=signal_path, + ) + return subjects + + +def ingest_artifact_set( + connection: sqlite3.Connection, + *, + bundle_path: str | Path, + analysis_path: str | Path | None = None, + signal_path: str | Path | None = None, +) -> None: + bundle = load_json(bundle_path) + signal_exists = signal_path is not None and Path(signal_path).exists() + repo, subject_kind, subject_id = record_bundle( + connection, + bundle, + bundle_path, + status="signal" if signal_exists else "watch", + reason="Imported from generated Radar artifacts.", + ) + if analysis_path is not None and Path(analysis_path).exists(): + record_artifact( + connection, + repo=repo, + subject_kind=subject_kind, + subject_id=subject_id, + artifact_kind="analysis", + path=analysis_path, + ) + if signal_exists: + signal_subjects = record_signal_artifact(connection, signal_path) + if (repo, subject_kind, subject_id) not in signal_subjects: + record_artifact( + connection, + repo=repo, + subject_kind=subject_kind, + subject_id=subject_id, + artifact_kind="signal", + path=signal_path, + ) + + +def ingest_existing( + connection: sqlite3.Connection, + *, + bundles_dir: str | Path, + analysis_dir: str | Path, + signals_dir: str | Path, +) -> dict[str, int]: + bundles_path = Path(bundles_dir) + analysis_path = Path(analysis_dir) + signals_path = Path(signals_dir) + ingested = 0 + for bundle_path in sorted(bundles_path.glob("*.json")): + stem = bundle_path.stem + candidate_analysis = analysis_path / f"{stem}.analysis.json" + candidate_signal = signals_path / f"{stem}.json" + ingest_artifact_set( + connection, + bundle_path=bundle_path, + analysis_path=candidate_analysis if candidate_analysis.exists() else None, + signal_path=candidate_signal if candidate_signal.exists() else None, + ) + ingested += 1 + + linked_signal_paths = {signals_path / f"{path.stem}.json" for path in bundles_path.glob("*.json")} + for signal_path in sorted(signals_path.glob("*.json")): + if signal_path in linked_signal_paths: + continue + record_signal_artifact(connection, signal_path) + + connection.commit() + return {**summary_counts(connection), "bundles_ingested": ingested} + + +def summary_counts(connection: sqlite3.Connection) -> dict[str, int]: + tables = { + "upstream_commits": "upstream_commit", + "radar_reviews": "radar_review", + "artifact_links": "artifact_link", + "source_cache_entries": "source_cache", + } + result: dict[str, int] = {} + for key, table in tables.items(): + row = connection.execute(f"SELECT COUNT(*) AS count FROM {table}").fetchone() + result[key] = int(row["count"]) + return result + + +def print_summary(connection: sqlite3.Connection, *, as_json: bool) -> None: + payload = summary_counts(connection) + if as_json: + print(json.dumps(payload, indent=2, sort_keys=True)) + return + for key, value in payload.items(): + print(f"{key}\t{value}") + + +def main() -> None: + args = parse_args() + connection = connect(args.db) + try: + if args.command == "init": + print(args.db) + elif args.command == "ingest": + ingest_artifact_set( + connection, + bundle_path=args.bundle, + analysis_path=args.analysis, + signal_path=args.signal, + ) + connection.commit() + print_summary(connection, as_json=True) + elif args.command == "ingest-existing": + payload = ingest_existing( + connection, + bundles_dir=args.bundles_dir, + analysis_dir=args.analysis_dir, + signals_dir=args.signals_dir, + ) + print(json.dumps(payload, indent=2, sort_keys=True)) + elif args.command == "summary": + print_summary(connection, as_json=args.json) + else: + raise SystemExit(f"unknown command: {args.command}") + finally: + connection.close() + + +if __name__ == "__main__": + main() diff --git a/scripts/github/sync_latest_signals.py b/scripts/github/sync_latest_signals.py index fe0167fb..e1a7738a 100644 --- a/scripts/github/sync_latest_signals.py +++ b/scripts/github/sync_latest_signals.py @@ -26,6 +26,13 @@ routed_token_env, ) from contracts import dump_json, load_json, validate_signal # noqa: E402 +from radar_ledger import ( # noqa: E402 + DEFAULT_LEDGER_PATH, + connect as connect_ledger, + ingest_artifact_set, + record_commit, + record_review, +) COMMIT_URL_RE = re.compile(r"/commit/([0-9a-f]{7,40})$") PR_URL_RE = re.compile(r"/pull/(\d+)$") @@ -85,6 +92,12 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--token-env", help="Environment variable containing a GitHub token.") parser.add_argument("--codex-bin", default="codex", help="Codex executable to invoke.") parser.add_argument("--model", help="Optional Codex model override.") + parser.add_argument( + "--ledger", + default=DEFAULT_LEDGER_PATH, + help="Local SQLite Radar ledger path. Defaults to .decodex/radar.sqlite3.", + ) + parser.add_argument("--no-ledger", action="store_true", help="Disable local Radar ledger writes.") parser.add_argument( "--refresh-release-delta", action="store_true", @@ -214,64 +227,137 @@ def main() -> None: token_env = args.token_env or routed_token_env() or "GITHUB_TOKEN" token = os.environ.get(token_env) root = repo_root() + ledger_path = None if args.no_ledger else Path(args.ledger) + if ledger_path is not None and not ledger_path.is_absolute(): + ledger_path = root / ledger_path + ledger = connect_ledger(ledger_path) if ledger_path is not None else None signals_dir = (root / args.signals_dir).resolve() published_prs = published_pr_numbers(signals_dir) published_shas = published_commit_shas(signals_dir) commits = recent_commits(args.repo, token, args.search_limit) candidates: list[dict[str, Any]] = [] seen_candidate_keys: set[tuple[str, int | str]] = set() - for commit in commits: - if commit["sha"] in published_shas: - continue - pr_number = maybe_promote_commit_to_pr(args.repo, commit["sha"], token) - if pr_number is not None and pr_number in published_prs: - continue - candidate_key: tuple[str, int | str] = ( - ("pr", pr_number) if pr_number is not None else ("commit", commit["sha"]) - ) - if candidate_key in seen_candidate_keys: - continue - seen_candidate_keys.add(candidate_key) - candidates.append({**commit, "pr_number": pr_number}) - - unpublished = candidates - unpublished = [item for item in unpublished if candidate_score(item["title"]) > 0][: args.max_new_prs] - - created = 0 - for candidate in reversed(unpublished): - bundle_path, analysis_path, signal_path = signal_paths(candidate, args) - notes = [f"Discovered via continuous upstream commit sync: {candidate['url']}"] - pr_number = candidate.get("pr_number") - bundle = ( - build_pr_bundle(args.repo, pr_number, token, notes) - if isinstance(pr_number, int) - else build_commit_bundle(args.repo, candidate["sha"], token, notes) - ) - dump_json(root / bundle_path, bundle) - - run_script( - "run_codex_analysis.py", - "--bundle", - str(root / bundle_path), - "--out", - str(root / analysis_path), - "--repo-root", - str(root), - "--codex-bin", - args.codex_bin, - *(["--model", args.model] if args.model else []), - ) + try: + for commit in commits: + pr_number = maybe_promote_commit_to_pr(args.repo, commit["sha"], token) + subject_kind = "pr" if pr_number is not None else "commit" + subject_id = str(pr_number) if pr_number is not None else commit["sha"] + if ledger is not None: + record_commit( + ledger, + repo=args.repo, + sha=commit["sha"], + title=commit["title"], + url=commit["url"], + committed_at=commit.get("committed_at"), + pr_number=pr_number, + ) + if commit["sha"] in published_shas or (pr_number is not None and pr_number in published_prs): + if ledger is not None: + record_review( + ledger, + repo=args.repo, + subject_kind=subject_kind, + subject_id=subject_id, + status="signal", + reason="Already present in published signal collection.", + confidence="confirmed", + ) + continue + candidate_key: tuple[str, int | str] = ( + ("pr", pr_number) if pr_number is not None else ("commit", commit["sha"]) + ) + if candidate_key in seen_candidate_keys: + if ledger is not None: + record_review( + ledger, + repo=args.repo, + subject_kind=subject_kind, + subject_id=subject_id, + status="seen", + reason="Duplicate recent commit for an already considered PR.", + ) + continue + seen_candidate_keys.add(candidate_key) + score = candidate_score(commit["title"]) + if score <= 0: + if ledger is not None: + record_review( + ledger, + repo=args.repo, + subject_kind=subject_kind, + subject_id=subject_id, + status="skipped", + reason=f"Recent commit title scored {score}; no public signal was generated.", + confidence="likely", + ) + continue + candidates.append({**commit, "pr_number": pr_number, "score": score}) - run_script( - "render_signal_entry.py", - "--bundle", - str(root / bundle_path), - "--analysis", - str(root / analysis_path), - "--out", - str(root / signal_path), - ) - created += 1 + unpublished = candidates[: args.max_new_prs] + for candidate in candidates[args.max_new_prs :]: + if ledger is None: + continue + pr_number = candidate.get("pr_number") + subject_kind = "pr" if isinstance(pr_number, int) else "commit" + subject_id = str(pr_number) if isinstance(pr_number, int) else candidate["sha"] + record_review( + ledger, + repo=args.repo, + subject_kind=subject_kind, + subject_id=subject_id, + status="watch", + reason="Positive Radar candidate left for a later sync budget.", + confidence="likely", + ) + + created = 0 + for candidate in reversed(unpublished): + bundle_path, analysis_path, signal_path = signal_paths(candidate, args) + notes = [f"Discovered via continuous upstream commit sync: {candidate['url']}"] + pr_number = candidate.get("pr_number") + bundle = ( + build_pr_bundle(args.repo, pr_number, token, notes) + if isinstance(pr_number, int) + else build_commit_bundle(args.repo, candidate["sha"], token, notes) + ) + dump_json(root / bundle_path, bundle) + + run_script( + "run_codex_analysis.py", + "--bundle", + str(root / bundle_path), + "--out", + str(root / analysis_path), + "--repo-root", + str(root), + "--codex-bin", + args.codex_bin, + *(["--model", args.model] if args.model else []), + ) + + run_script( + "render_signal_entry.py", + "--bundle", + str(root / bundle_path), + "--analysis", + str(root / analysis_path), + "--out", + str(root / signal_path), + ) + if ledger is not None: + ingest_artifact_set( + ledger, + bundle_path=root / bundle_path, + analysis_path=root / analysis_path, + signal_path=root / signal_path, + ) + created += 1 + if ledger is not None: + ledger.commit() + finally: + if ledger is not None: + ledger.close() run_script("validate_signal_entry.py", str(root / args.signals_dir)) release_delta_refreshed = ( @@ -289,6 +375,7 @@ def main() -> None: "unpublished_changes_considered": len(candidates), "new_signals_created": created, "release_delta_refreshed": release_delta_refreshed, + "ledger": str(ledger_path) if ledger_path is not None else None, }, sort_keys=True, ) diff --git a/scripts/github/test_radar_ledger.py b/scripts/github/test_radar_ledger.py new file mode 100644 index 00000000..ed204c52 --- /dev/null +++ b/scripts/github/test_radar_ledger.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path + +MODULE_PATH = Path(__file__).resolve().with_name("radar_ledger.py") +MODULE_SPEC = importlib.util.spec_from_file_location("radar_ledger", MODULE_PATH) +if MODULE_SPEC is None or MODULE_SPEC.loader is None: + raise RuntimeError(f"Unable to load {MODULE_PATH}") +radar_ledger = importlib.util.module_from_spec(MODULE_SPEC) +MODULE_SPEC.loader.exec_module(radar_ledger) + + +class RadarLedgerTests(unittest.TestCase): + def write_json(self, path: Path, payload: dict[str, object]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload), encoding="utf-8") + + def test_ingests_existing_bundle_analysis_and_signal(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + bundle_path = root / "artifacts/github/bundles/openai-codex-pr-123.json" + analysis_path = root / "artifacts/github/analysis/openai-codex-pr-123.analysis.json" + signal_path = root / "site/src/content/signals/openai-codex-pr-123.json" + self.write_json( + bundle_path, + { + "schema": "github_change_bundle/v1", + "repo": "openai/codex", + "analysis_mode": "pr_first", + "default_branch": "main", + "primary_pr": { + "number": 123, + "title": "Add useful behavior", + "body": "", + "state": "merged", + "labels": [], + "url": "https://github.com/openai/codex/pull/123", + }, + "commits": [ + { + "sha": "abc1234", + "message": "Add useful behavior", + "url": "https://github.com/openai/codex/commit/abc1234", + "committed_at": "2026-05-13T00:00:00Z", + } + ], + "files": [ + { + "path": "codex-rs/core/src/lib.rs", + "status": "modified", + "additions": 1, + "deletions": 0, + } + ], + }, + ) + self.write_json( + analysis_path, + { + "kind": "capability", + "title": "Useful behavior", + "summary": "Adds behavior.", + "why_it_matters": "It helps users.", + "confidence": "confirmed", + "impact": "medium", + "proof_points": ["PR exists."], + }, + ) + self.write_json( + signal_path, + { + "schema": "signal_entry/v1", + "slug": "useful-behavior", + "lane": "github", + "kind": "capability", + "title": "Useful behavior", + "published_at": "2026-05-13T00:00:00Z", + "summary": "Adds behavior.", + "why_it_matters": "It helps users.", + "confidence": "confirmed", + "impact": "medium", + "config_flags": [], + "caveats": [], + "proof_points": ["PR exists."], + "source_refs": { + "repo": "openai/codex", + "pr_url": "https://github.com/openai/codex/pull/123", + "commit_urls": ["https://github.com/openai/codex/commit/abc1234"], + }, + }, + ) + + connection = radar_ledger.connect(root / "radar.sqlite3") + try: + payload = radar_ledger.ingest_existing( + connection, + bundles_dir=root / "artifacts/github/bundles", + analysis_dir=root / "artifacts/github/analysis", + signals_dir=root / "site/src/content/signals", + ) + finally: + connection.close() + + self.assertEqual(payload["upstream_commits"], 1) + self.assertEqual(payload["radar_reviews"], 2) + self.assertEqual(payload["artifact_links"], 4) + self.assertEqual(payload["bundles_ingested"], 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/site/src/components/ReleaseDeltaPanel.astro b/site/src/components/ReleaseDeltaPanel.astro index 6c7dde1e..f62143cc 100644 --- a/site/src/components/ReleaseDeltaPanel.astro +++ b/site/src/components/ReleaseDeltaPanel.astro @@ -37,7 +37,20 @@ const stableOptions = delta.release_options.stable; const previewOptions = delta.release_options.preview; const stableByTag = new Map(stableOptions.map((release) => [release.tag_name, release])); const previewByTag = new Map(previewOptions.map((release) => [release.tag_name, release])); -const activeComparison = defaultComparison(delta); +const latestComparison = delta.comparisons.find( + (comparison) => + comparison.stable_tag_name === delta.stable_release.tag_name && + comparison.prerelease_tag_name === delta.prerelease.tag_name, +); +const latestComparisonTrackedCount = latestComparison?.tracked_signal_slugs.length ?? 0; +const activeComparison = + (latestComparison && + trackedSignalsForSlugs(latestComparison.tracked_signal_slugs, trackedSignals).length > 0 + ? latestComparison + : delta.comparisons.find( + (comparison) => + trackedSignalsForSlugs(comparison.tracked_signal_slugs, trackedSignals).length > 0, + )) ?? defaultComparison(delta); function hasConcreteTryPath(signal: SignalCardData): boolean { return Boolean(signal.how_to_try && signal.expected_effect); @@ -129,6 +142,9 @@ const comparisonEntries = delta.comparisons if (!latest || signal.published_at > latest) return signal.published_at; return latest; }, null); + const isLatestPair = + comparison.stable_tag_name === delta.stable_release.tag_name && + comparison.prerelease_tag_name === delta.prerelease.tag_name; return { ...comparison, stable, @@ -136,6 +152,7 @@ const comparisonEntries = delta.comparisons featuredSignals, highlightSignals, latestTrackedAt, + isLatestPair, key: comparisonKey(comparison.stable_tag_name, comparison.prerelease_tag_name), }; }) @@ -209,8 +226,13 @@ const comparisonKeysByStable = Object.fromEntries( {entry.compare.total_commits} commits ahead - {entry.tracked_signal_slugs.length} tracked signals + {entry.featuredSignals.length} curated signals + {!entry.isLatestPair ? ( + + Latest pair {latestComparisonTrackedCount} signals + + ) : null} Last updated {formatPublishedAt(entry.latestTrackedAt ?? entry.preview.published_at)} @@ -272,7 +294,7 @@ const comparisonKeysByStable = Object.fromEntries( ) : (

- No published signals in this version pair currently clear the highlight bar. + No curated public signals in this version pair currently clear the highlight bar.

) } diff --git a/site/src/lib/signal-feed.ts b/site/src/lib/signal-feed.ts index e85befde..c18d8c26 100644 --- a/site/src/lib/signal-feed.ts +++ b/site/src/lib/signal-feed.ts @@ -68,6 +68,9 @@ export type SignalGroup = { items: SignalCardData[]; }; +const DEPRECATED_OR_MIGRATION_PATTERN = + /\b(deprecat|remove|removed|drops?|no longer|legacy|disabled|disable|rollback|rolled back|breaking)\b/i; + export function isFilterId(value: string | null): value is FilterId { return FILTERS.some((filter) => filter.id === value); } @@ -93,6 +96,30 @@ export function sortSignals(signals: SignalCardData[]): SignalCardData[] { ); } +export function isDeprecatedOrMigrationSignal(signal: SignalCardData): boolean { + const searchable = [ + signal.title, + signal.summary, + signal.why_it_matters, + signal.watch_state ?? "", + ...signal.caveats, + ].join("\n"); + return DEPRECATED_OR_MIGRATION_PATTERN.test(searchable); +} + +export function isHomepageSignal(signal: SignalCardData): boolean { + if (signal.impact !== "low") return true; + if (signal.kind === "try_now") return true; + if (signal.how_to_try) return true; + if (signal.config_flags.length > 0) return true; + if (signal.kind === "capability" && signal.confidence === "confirmed") return true; + return isDeprecatedOrMigrationSignal(signal); +} + +export function homepageSignals(signals: SignalCardData[]): SignalCardData[] { + return sortSignals(signals).filter(isHomepageSignal); +} + export function groupSignalsByMonth(signals: SignalCardData[]): SignalGroup[] { const groups = new Map(); diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index 9dfff4a8..fa3f8237 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -12,6 +12,7 @@ import { type ReleaseDeltaData } from "@/lib/release-delta"; import { FILTERS, groupSignalsByMonth, + homepageSignals, isFilterId, sortSignals, type FilterId, @@ -23,12 +24,13 @@ const activeFilter: FilterId = isFilterId(rawFilter) ? rawFilter : "all"; const collectionEntries = await getCollection("signals"); const releaseDeltaEntries = await getCollection("releaseDeltas"); const recommendedConfigEntries = await getCollection("recommendedConfigs"); -const signals: SignalCardData[] = sortSignals( +const allSignals: SignalCardData[] = sortSignals( collectionEntries.map((entry) => ({ id: entry.id, ...entry.data, })), ); +const signals: SignalCardData[] = homepageSignals(allSignals); const releaseDelta: ReleaseDeltaData | null = releaseDeltaEntries .map((entry) => entry.data as ReleaseDeltaData) @@ -42,11 +44,11 @@ const groupedSignals = groupSignalsByMonth(signals); const showGroupLabels = groupedSignals.length > 1; const clientFilterIds = FILTERS.map((filter) => filter.id); const tryPathCount = signals.filter((signal) => Boolean(signal.how_to_try)).length; +const archivedSignalCount = Math.max(0, allSignals.length - signals.length); const sourceLinkedCount = signals.filter((signal) => { const refs = signal.source_refs; return Boolean(refs.pr_url || refs.items?.length || refs.commit_urls.length); }).length; -const highImpactCount = signals.filter((signal) => signal.impact === "high").length; const previewSignal: SignalCardData = { id: "preview-shell", @@ -231,8 +233,8 @@ const filterRuntimeScript = `(function () { {sourceLinkedCount}/{signals.length}
  • - High impact - {highImpactCount} + Archive + {archivedSignalCount}
  • @@ -273,6 +275,10 @@ const filterRuntimeScript = `(function () {

    Latest signals

    +

    + {signals.length} community-ready signals from {allSignals.length} analyzed upstream changes. + {archivedSignalCount > 0 ? ` ${archivedSignalCount} low-level watch items stay out of the public feed.` : ""} +

    diff --git a/site/src/styles/global.css b/site/src/styles/global.css index bcd2388b..67d45bf0 100644 --- a/site/src/styles/global.css +++ b/site/src/styles/global.css @@ -1566,6 +1566,13 @@ code { text-transform: uppercase; } +.feed-toolbar__summary { + max-width: 42rem; + color: var(--ink-soft); + font-size: 0.88rem; + line-height: 1.55; +} + .feed-region { margin-top: 1rem; display: flex;