Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions .github/workflows/blog-autopublish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
name: Blog auto-publish

# Runs daily at 06:00 UTC. Scans content/blog/ for drafts; for any
# post with `date <= today`, `draft = true`, and `hold` not set, it
# opens an auto-merge PR that flips draft to false. Each publish
# becomes its own PR/squash-merge so deploy fires per-post and the
# audit trail is clean.
#
# Always posts a status report to a pinned issue (label
# `autopublish-status`) using a bot-marker (`<!-- blog-cron-status -->`)
# so the same comment is replaced each run instead of appended.
#
# Failure path opens a separate issue tagged `autopublish-failure`.
#
# Manual trigger via workflow_dispatch is supported for first-run /
# debugging. Concurrency is serialized so two runs cannot interleave
# (e.g. cron + manual).
#
# Security note: this workflow has only `schedule` and
# `workflow_dispatch` triggers — neither carries untrusted user input.
# All values flowing into `run:` blocks come from server-controlled
# context (github.run_id, github.repository) or from our own script
# reading our own repository's frontmatter, so no command-injection
# surface exists from external actors.

on:
schedule:
- cron: "0 6 * * *"
workflow_dispatch:

permissions:
contents: write
pull-requests: write
issues: write

concurrency:
group: blog-autopublish
cancel-in-progress: false

jobs:
publish:
name: Scan, publish, report
runs-on: ubuntu-latest
timeout-minutes: 15
env:
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
REPO: ${{ github.repository }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Configure git identity
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"

- name: Scan drafts
id: scan
run: |
set -euo pipefail
python3 scripts/blog-autopublish.py --mode scan > /tmp/inventory.json
ready_count=$(jq '.ready | length' /tmp/inventory.json)
scheduled_count=$(jq '.scheduled | length' /tmp/inventory.json)
held_count=$(jq '.held | length' /tmp/inventory.json)
echo "ready_count=${ready_count}" >> "$GITHUB_OUTPUT"
echo "scheduled_count=${scheduled_count}" >> "$GITHUB_OUTPUT"
echo "held_count=${held_count}" >> "$GITHUB_OUTPUT"
{
echo "## Inventory"
echo '```json'
cat /tmp/inventory.json
echo '```'
} >> "$GITHUB_STEP_SUMMARY"

- name: Publish ready posts
id: publish
if: steps.scan.outputs.ready_count != '0'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
published='[]'
today=$(date -u +%Y-%m-%d)

# Iterate each ready post, one PR per post.
while IFS=$'\t' read -r path slug; do
branch="cron/publish-${slug}-${today}"

# Clear any stale branch from a previous failed run.
git branch -D "$branch" 2>/dev/null || true
git push origin --delete "$branch" 2>/dev/null || true

git checkout -b "$branch"
python3 scripts/blog-autopublish.py --mode flip --file "$path"
git add "$path"
git commit -m "publish: ${slug}"
git push -u origin "$branch"

pr_url=$(gh pr create \
--base main \
--head "$branch" \
--title "publish: ${slug}" \
--body "Automated publish for \`${slug}\` triggered by date \`${today}\`. Opened by the blog-autopublish workflow." \
--label autopublish)
pr_num=$(echo "$pr_url" | grep -oE '[0-9]+$')

# Squash-merge directly. If branch protections require checks,
# fall back to --auto so the merge happens once CI passes.
gh pr merge "$pr_num" --squash --delete-branch || \
gh pr merge "$pr_num" --auto --squash --delete-branch

published=$(echo "$published" | jq --arg s "$slug" --arg p "$pr_num" '. + [{slug: $s, pr: $p}]')
git checkout main
git pull --ff-only origin main
done < <(jq -r '.ready[] | [.path, .slug] | @tsv' /tmp/inventory.json)

echo "$published" > /tmp/published.json

- name: Render status report
if: always()
run: |
set -euo pipefail
published=$(cat /tmp/published.json 2>/dev/null || echo '[]')
python3 scripts/blog-autopublish.py \
--mode report \
--inventory /tmp/inventory.json \
--published "$published" \
--run-url "$RUN_URL" > /tmp/report.md
cat /tmp/report.md >> "$GITHUB_STEP_SUMMARY"

- name: Post / update status comment
if: always()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail

# Locate the pinned status issue by label, or create it on first run.
issue_num=$(gh issue list --label autopublish-status --state open --json number --jq '.[0].number // empty')
if [ -z "$issue_num" ]; then
issue_url=$(gh issue create \
--title "Blog auto-publish status" \
--label autopublish-status \
--body-file /tmp/report.md)
issue_num=$(echo "$issue_url" | grep -oE '[0-9]+$')
echo "Created status issue #${issue_num}"
exit 0
fi

# Find the previous bot comment by its hidden marker.
comment_id=$(gh api "repos/${REPO}/issues/${issue_num}/comments" --paginate \
--jq '.[] | select(.body | contains("<!-- blog-cron-status -->")) | .id' | head -1)

if [ -n "$comment_id" ]; then
gh api "repos/${REPO}/issues/comments/${comment_id}" \
-X PATCH \
--field body=@/tmp/report.md
echo "Updated comment ${comment_id} on issue #${issue_num}"
else
gh issue comment "$issue_num" --body-file /tmp/report.md
echo "Created new comment on issue #${issue_num}"
fi

- name: Open failure issue
if: failure()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
today=$(date -u +%Y-%m-%d)
gh issue create \
--title "blog-autopublish failed: ${today}" \
--label autopublish-failure \
--body "The daily blog-autopublish workflow failed on ${today}. Logs: ${RUN_URL}"
167 changes: 167 additions & 0 deletions scripts/blog-autopublish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python3
"""Blog auto-publish: scan content/blog/ drafts and categorize them by schedule.

Three modes:

--mode scan
Print a JSON inventory to stdout: {today, ready, scheduled, held}.
A draft is "ready" when date <= today and hold is not true.
A draft is "scheduled" when date > today (and hold is not true).
A draft is "held" when hold = true (regardless of date).

--mode flip --file PATH
Atomically replace the first `draft = true` line in PATH with
`draft = false`. Used by the workflow once a post moves to ready.

--mode report --inventory PATH --published JSON [--run-url URL]
Render a markdown status report (with a `<!-- blog-cron-status -->`
bot marker) suitable for posting on the pinned status issue.

The frontmatter parser is intentionally regex-based, not full-TOML — it
only reads the four fields that drive the cron (date, draft, hold, title)
and does not need to interpret arrays or tables. This avoids a dependency
on `tomllib` and keeps the script readable.
"""

import argparse
import datetime
import json
import pathlib
import re
import sys

BLOG_DIR = pathlib.Path("content/blog")

FRONTMATTER_RE = re.compile(r"\A\+\+\+\n(.*?)\n\+\+\+", re.S)
DATE_RE = re.compile(r"^date\s*=\s*(\d{4}-\d{2}-\d{2})\s*$", re.M)
DRAFT_RE = re.compile(r"^draft\s*=\s*(true|false)\s*$", re.M)
HOLD_RE = re.compile(r"^hold\s*=\s*(true|false)\s*$", re.M)
TITLE_RE = re.compile(r'^title\s*=\s*"([^"]*)"\s*$', re.M)
DRAFT_TRUE_LINE = re.compile(r"^draft\s*=\s*true\s*$", re.M)


def parse_post(path: pathlib.Path) -> dict | None:
"""Return a record for a post, or None if it has no parseable frontmatter."""
text = path.read_text(encoding="utf-8")
fm_match = FRONTMATTER_RE.match(text)
if not fm_match:
return None
fm = fm_match.group(1)
date_m = DATE_RE.search(fm)
draft_m = DRAFT_RE.search(fm)
hold_m = HOLD_RE.search(fm)
title_m = TITLE_RE.search(fm)
return {
"path": str(path),
"slug": path.stem.split("-", 3)[-1] if path.stem[:10].count("-") == 2 else path.stem,
"title": title_m.group(1) if title_m else path.stem,
"date": date_m.group(1) if date_m else None,
"draft": bool(draft_m and draft_m.group(1) == "true"),
"hold": bool(hold_m and hold_m.group(1) == "true"),
}


def scan(today: str) -> dict:
drafts = []
for path in sorted(BLOG_DIR.glob("*.md")):
if path.name.startswith("_"):
continue
record = parse_post(path)
if record is None:
continue
if record["draft"]:
drafts.append(record)
held = [p for p in drafts if p["hold"]]
active = [p for p in drafts if not p["hold"]]
ready = [p for p in active if p["date"] and p["date"] <= today]
scheduled = [p for p in active if p["date"] and p["date"] > today]
return {
"today": today,
"ready": ready,
"scheduled": sorted(scheduled, key=lambda p: p["date"]),
"held": sorted(held, key=lambda p: p["slug"]),
}


def flip(path_str: str) -> None:
path = pathlib.Path(path_str)
text = path.read_text(encoding="utf-8")
new_text, count = DRAFT_TRUE_LINE.subn("draft = false", text, count=1)
if count == 0:
sys.exit(f"flip: no `draft = true` line found in {path_str}")
path.write_text(new_text, encoding="utf-8")


def render_report(inventory: dict, published: list[dict], run_url: str) -> str:
today = inventory["today"]
lines = [
"<!-- blog-cron-status -->",
f"## Blog auto-publish · last run {today}",
"",
]
if published:
lines.append(f"**This run:** published {len(published)} —")
for entry in published:
slug = entry["slug"]
pr = entry.get("pr")
pr_suffix = f" (PR #{pr})" if pr else ""
lines.append(f"- `{slug}`{pr_suffix}")
else:
lines.append("**This run:** no posts ready to publish today.")
lines.append("")

scheduled = inventory["scheduled"]
lines.append(f"**Scheduled ({len(scheduled)}):**")
if scheduled:
for p in scheduled:
lines.append(f"- {p['date']} · `{p['slug']}` — {p['title']}")
else:
lines.append("- _none_")
lines.append("")

held = inventory["held"]
lines.append(f"**Held ({len(held)}):**")
if held:
for p in held:
lines.append(f"- ∞ · `{p['slug']}` — {p['title']}")
else:
lines.append("- _none_")
lines.append("")

if run_url:
lines.append(f"_Run: {run_url}_")
return "\n".join(lines) + "\n"


def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--mode", choices=["scan", "flip", "report"], required=True)
parser.add_argument("--file", help="Path to the post (--mode flip)")
parser.add_argument("--inventory", help="Path to inventory JSON (--mode report)")
parser.add_argument(
"--published",
default="[]",
help="JSON array of {slug, pr} for posts published this run (--mode report)",
)
parser.add_argument("--run-url", default="", help="GitHub Actions run URL (--mode report)")
args = parser.parse_args()

if args.mode == "scan":
today = datetime.date.today().isoformat()
json.dump(scan(today), sys.stdout, indent=2)
sys.stdout.write("\n")
elif args.mode == "flip":
if not args.file:
sys.exit("--file is required for --mode flip")
flip(args.file)
elif args.mode == "report":
if not args.inventory:
sys.exit("--inventory is required for --mode report")
with open(args.inventory, encoding="utf-8") as f:
inventory = json.load(f)
published = json.loads(args.published)
sys.stdout.write(render_report(inventory, published, args.run_url))


if __name__ == "__main__":
main()
Loading