diff --git a/.github/workflows/blog-autopublish.yml b/.github/workflows/blog-autopublish.yml new file mode 100644 index 0000000..501cce9 --- /dev/null +++ b/.github/workflows/blog-autopublish.yml @@ -0,0 +1,178 @@ +name: Blog auto-publish + +# Runs daily at 06:00 UTC. Scans content/blog/ for drafts; for any +# post with `date <= today`, `draft = true`, and `hold` not set, it +# opens an auto-merge PR that flips draft to false. Each publish +# becomes its own PR/squash-merge so deploy fires per-post and the +# audit trail is clean. +# +# Always posts a status report to a pinned issue (label +# `autopublish-status`) using a bot-marker (``) +# so the same comment is replaced each run instead of appended. +# +# Failure path opens a separate issue tagged `autopublish-failure`. +# +# Manual trigger via workflow_dispatch is supported for first-run / +# debugging. Concurrency is serialized so two runs cannot interleave +# (e.g. cron + manual). +# +# Security note: this workflow has only `schedule` and +# `workflow_dispatch` triggers — neither carries untrusted user input. +# All values flowing into `run:` blocks come from server-controlled +# context (github.run_id, github.repository) or from our own script +# reading our own repository's frontmatter, so no command-injection +# surface exists from external actors. + +on: + schedule: + - cron: "0 6 * * *" + workflow_dispatch: + +permissions: + contents: write + pull-requests: write + issues: write + +concurrency: + group: blog-autopublish + cancel-in-progress: false + +jobs: + publish: + name: Scan, publish, report + runs-on: ubuntu-latest + timeout-minutes: 15 + env: + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + REPO: ${{ github.repository }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Configure git identity + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Scan drafts + id: scan + run: | + set -euo pipefail + python3 scripts/blog-autopublish.py --mode scan > /tmp/inventory.json + ready_count=$(jq '.ready | length' /tmp/inventory.json) + scheduled_count=$(jq '.scheduled | length' /tmp/inventory.json) + held_count=$(jq '.held | length' /tmp/inventory.json) + echo "ready_count=${ready_count}" >> "$GITHUB_OUTPUT" + echo "scheduled_count=${scheduled_count}" >> "$GITHUB_OUTPUT" + echo "held_count=${held_count}" >> "$GITHUB_OUTPUT" + { + echo "## Inventory" + echo '```json' + cat /tmp/inventory.json + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + + - name: Publish ready posts + id: publish + if: steps.scan.outputs.ready_count != '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + published='[]' + today=$(date -u +%Y-%m-%d) + + # Iterate each ready post, one PR per post. + while IFS=$'\t' read -r path slug; do + branch="cron/publish-${slug}-${today}" + + # Clear any stale branch from a previous failed run. + git branch -D "$branch" 2>/dev/null || true + git push origin --delete "$branch" 2>/dev/null || true + + git checkout -b "$branch" + python3 scripts/blog-autopublish.py --mode flip --file "$path" + git add "$path" + git commit -m "publish: ${slug}" + git push -u origin "$branch" + + pr_url=$(gh pr create \ + --base main \ + --head "$branch" \ + --title "publish: ${slug}" \ + --body "Automated publish for \`${slug}\` triggered by date \`${today}\`. Opened by the blog-autopublish workflow." \ + --label autopublish) + pr_num=$(echo "$pr_url" | grep -oE '[0-9]+$') + + # Squash-merge directly. If branch protections require checks, + # fall back to --auto so the merge happens once CI passes. + gh pr merge "$pr_num" --squash --delete-branch || \ + gh pr merge "$pr_num" --auto --squash --delete-branch + + published=$(echo "$published" | jq --arg s "$slug" --arg p "$pr_num" '. + [{slug: $s, pr: $p}]') + git checkout main + git pull --ff-only origin main + done < <(jq -r '.ready[] | [.path, .slug] | @tsv' /tmp/inventory.json) + + echo "$published" > /tmp/published.json + + - name: Render status report + if: always() + run: | + set -euo pipefail + published=$(cat /tmp/published.json 2>/dev/null || echo '[]') + python3 scripts/blog-autopublish.py \ + --mode report \ + --inventory /tmp/inventory.json \ + --published "$published" \ + --run-url "$RUN_URL" > /tmp/report.md + cat /tmp/report.md >> "$GITHUB_STEP_SUMMARY" + + - name: Post / update status comment + if: always() + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + + # Locate the pinned status issue by label, or create it on first run. + issue_num=$(gh issue list --label autopublish-status --state open --json number --jq '.[0].number // empty') + if [ -z "$issue_num" ]; then + issue_url=$(gh issue create \ + --title "Blog auto-publish status" \ + --label autopublish-status \ + --body-file /tmp/report.md) + issue_num=$(echo "$issue_url" | grep -oE '[0-9]+$') + echo "Created status issue #${issue_num}" + exit 0 + fi + + # Find the previous bot comment by its hidden marker. + comment_id=$(gh api "repos/${REPO}/issues/${issue_num}/comments" --paginate \ + --jq '.[] | select(.body | contains("")) | .id' | head -1) + + if [ -n "$comment_id" ]; then + gh api "repos/${REPO}/issues/comments/${comment_id}" \ + -X PATCH \ + --field body=@/tmp/report.md + echo "Updated comment ${comment_id} on issue #${issue_num}" + else + gh issue comment "$issue_num" --body-file /tmp/report.md + echo "Created new comment on issue #${issue_num}" + fi + + - name: Open failure issue + if: failure() + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + today=$(date -u +%Y-%m-%d) + gh issue create \ + --title "blog-autopublish failed: ${today}" \ + --label autopublish-failure \ + --body "The daily blog-autopublish workflow failed on ${today}. Logs: ${RUN_URL}" diff --git a/scripts/blog-autopublish.py b/scripts/blog-autopublish.py new file mode 100755 index 0000000..c9ff4ab --- /dev/null +++ b/scripts/blog-autopublish.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +"""Blog auto-publish: scan content/blog/ drafts and categorize them by schedule. + +Three modes: + + --mode scan + Print a JSON inventory to stdout: {today, ready, scheduled, held}. + A draft is "ready" when date <= today and hold is not true. + A draft is "scheduled" when date > today (and hold is not true). + A draft is "held" when hold = true (regardless of date). + + --mode flip --file PATH + Atomically replace the first `draft = true` line in PATH with + `draft = false`. Used by the workflow once a post moves to ready. + + --mode report --inventory PATH --published JSON [--run-url URL] + Render a markdown status report (with a `` + bot marker) suitable for posting on the pinned status issue. + +The frontmatter parser is intentionally regex-based, not full-TOML — it +only reads the four fields that drive the cron (date, draft, hold, title) +and does not need to interpret arrays or tables. This avoids a dependency +on `tomllib` and keeps the script readable. +""" + +import argparse +import datetime +import json +import pathlib +import re +import sys + +BLOG_DIR = pathlib.Path("content/blog") + +FRONTMATTER_RE = re.compile(r"\A\+\+\+\n(.*?)\n\+\+\+", re.S) +DATE_RE = re.compile(r"^date\s*=\s*(\d{4}-\d{2}-\d{2})\s*$", re.M) +DRAFT_RE = re.compile(r"^draft\s*=\s*(true|false)\s*$", re.M) +HOLD_RE = re.compile(r"^hold\s*=\s*(true|false)\s*$", re.M) +TITLE_RE = re.compile(r'^title\s*=\s*"([^"]*)"\s*$', re.M) +DRAFT_TRUE_LINE = re.compile(r"^draft\s*=\s*true\s*$", re.M) + + +def parse_post(path: pathlib.Path) -> dict | None: + """Return a record for a post, or None if it has no parseable frontmatter.""" + text = path.read_text(encoding="utf-8") + fm_match = FRONTMATTER_RE.match(text) + if not fm_match: + return None + fm = fm_match.group(1) + date_m = DATE_RE.search(fm) + draft_m = DRAFT_RE.search(fm) + hold_m = HOLD_RE.search(fm) + title_m = TITLE_RE.search(fm) + return { + "path": str(path), + "slug": path.stem.split("-", 3)[-1] if path.stem[:10].count("-") == 2 else path.stem, + "title": title_m.group(1) if title_m else path.stem, + "date": date_m.group(1) if date_m else None, + "draft": bool(draft_m and draft_m.group(1) == "true"), + "hold": bool(hold_m and hold_m.group(1) == "true"), + } + + +def scan(today: str) -> dict: + drafts = [] + for path in sorted(BLOG_DIR.glob("*.md")): + if path.name.startswith("_"): + continue + record = parse_post(path) + if record is None: + continue + if record["draft"]: + drafts.append(record) + held = [p for p in drafts if p["hold"]] + active = [p for p in drafts if not p["hold"]] + ready = [p for p in active if p["date"] and p["date"] <= today] + scheduled = [p for p in active if p["date"] and p["date"] > today] + return { + "today": today, + "ready": ready, + "scheduled": sorted(scheduled, key=lambda p: p["date"]), + "held": sorted(held, key=lambda p: p["slug"]), + } + + +def flip(path_str: str) -> None: + path = pathlib.Path(path_str) + text = path.read_text(encoding="utf-8") + new_text, count = DRAFT_TRUE_LINE.subn("draft = false", text, count=1) + if count == 0: + sys.exit(f"flip: no `draft = true` line found in {path_str}") + path.write_text(new_text, encoding="utf-8") + + +def render_report(inventory: dict, published: list[dict], run_url: str) -> str: + today = inventory["today"] + lines = [ + "", + f"## Blog auto-publish · last run {today}", + "", + ] + if published: + lines.append(f"**This run:** published {len(published)} —") + for entry in published: + slug = entry["slug"] + pr = entry.get("pr") + pr_suffix = f" (PR #{pr})" if pr else "" + lines.append(f"- `{slug}`{pr_suffix}") + else: + lines.append("**This run:** no posts ready to publish today.") + lines.append("") + + scheduled = inventory["scheduled"] + lines.append(f"**Scheduled ({len(scheduled)}):**") + if scheduled: + for p in scheduled: + lines.append(f"- {p['date']} · `{p['slug']}` — {p['title']}") + else: + lines.append("- _none_") + lines.append("") + + held = inventory["held"] + lines.append(f"**Held ({len(held)}):**") + if held: + for p in held: + lines.append(f"- ∞ · `{p['slug']}` — {p['title']}") + else: + lines.append("- _none_") + lines.append("") + + if run_url: + lines.append(f"_Run: {run_url}_") + return "\n".join(lines) + "\n" + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--mode", choices=["scan", "flip", "report"], required=True) + parser.add_argument("--file", help="Path to the post (--mode flip)") + parser.add_argument("--inventory", help="Path to inventory JSON (--mode report)") + parser.add_argument( + "--published", + default="[]", + help="JSON array of {slug, pr} for posts published this run (--mode report)", + ) + parser.add_argument("--run-url", default="", help="GitHub Actions run URL (--mode report)") + args = parser.parse_args() + + if args.mode == "scan": + today = datetime.date.today().isoformat() + json.dump(scan(today), sys.stdout, indent=2) + sys.stdout.write("\n") + elif args.mode == "flip": + if not args.file: + sys.exit("--file is required for --mode flip") + flip(args.file) + elif args.mode == "report": + if not args.inventory: + sys.exit("--inventory is required for --mode report") + with open(args.inventory, encoding="utf-8") as f: + inventory = json.load(f) + published = json.loads(args.published) + sys.stdout.write(render_report(inventory, published, args.run_url)) + + +if __name__ == "__main__": + main()