From d4a19dc267fba7317fbb0a80f6138fd9ed5d1969 Mon Sep 17 00:00:00 2001 From: Thor Whalen <1906276+thorwhalen@users.noreply.github.com> Date: Sun, 21 Jun 2026 18:59:53 +0200 Subject: [PATCH] Phase 1: backend core (models, sinks, mountable ingest router, hardening seams) Deterministic, dependency-injected backend spine: - base.py: Pydantic SSOT (Report/ReportSubmission/Identity/Environment/Status/...) - sinks/: Sink protocol + StoreSink (default) + GitHubIssuesSink (injected create_issue, lazy ghapi, image-attachment-aware markdown body) - ingest.py: process_submission core + mountable FastAPI router + make_app - security.py: origin allow-list + in-memory rate-limiter seams - storage.py / config.py: dol-backed stores + HeedConfig - 9 tests (model round-trip, sinks, ingest core + router, origin block); ruff-clean - CI workflow stub (wads uv reusable); publish disabled during design phase Privacy by default: opt-in console/network dropped unless config enables. Refs #2 #3 #4 #5 Claude-Session: https://claude.ai/code/session_01TH3XxQ8qW36Dh3eCVjX1PA --- .github/workflows/ci.yml | 48 +++++++++ heed/__init__.py | 87 +++++++++++++--- heed/base.py | 133 +++++++++++++++++++++++++ heed/config.py | 17 ++++ heed/ingest.py | 208 +++++++++++++++++++++++++++++++++++++++ heed/security.py | 58 +++++++++++ heed/sinks/__init__.py | 38 +++++++ heed/sinks/base.py | 54 ++++++++++ heed/sinks/github.py | 129 ++++++++++++++++++++++++ heed/sinks/store.py | 38 +++++++ heed/storage.py | 26 +++++ pyproject.toml | 5 + tests/test_base.py | 19 ++++ tests/test_ingest.py | 74 ++++++++++++++ tests/test_sinks.py | 41 ++++++++ 15 files changed, 963 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 heed/base.py create mode 100644 heed/config.py create mode 100644 heed/ingest.py create mode 100644 heed/security.py create mode 100644 heed/sinks/__init__.py create mode 100644 heed/sinks/base.py create mode 100644 heed/sinks/github.py create mode 100644 heed/sinks/store.py create mode 100644 heed/storage.py create mode 100644 tests/test_base.py create mode 100644 tests/test_ingest.py create mode 100644 tests/test_sinks.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..07aa405 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,48 @@ +# wads CI — calls the reusable workflow hosted in i2mint/wads. +# +# All configuration comes from this repo's pyproject.toml [tool.wads.ci.*]. +# To customize the workflow itself (rare), replace this file with the +# full inline template `wads/data/github_ci_uv.yml` from i2mint/wads. +# +# Pinning: `@master` floats with wads. If you need version stability for +# a release-sensitive repo, change `@master` to a wads tag (e.g. `@v0.1.81`). +# CI failure does not block a published release — it blocks the publish +# step itself — so floating master is generally safe. +# +# Permissions: GitHub validates that the caller grants AT LEAST the +# permissions any job in the called workflow requests — at workflow-parse +# time, not at run-time, even if the job would be skipped via `if:`. +# The reusable workflow needs: +# contents: write for the publish job's version-bump push-back +# and for the github-pages job's gh-pages branch push +# pages: write for the github-pages job's REST API Pages config +# Both default to `write` on org-account GITHUB_TOKEN and need to be +# granted explicitly on personal-account callers (where the default is +# read-only). No `id-token: write` needed — the publish-github-pages +# action uses peaceiris/actions-gh-pages (branch-based) + REST API, +# not the OIDC `actions/deploy-pages` flow. +name: Continuous Integration +on: [push, pull_request] +jobs: + ci: + uses: i2mint/wads/.github/workflows/uv-ci.yml@master + permissions: + contents: write + pages: write + # Explicit pass-through (not `secrets: inherit`) because `inherit` does + # not reliably propagate caller-repo secrets to a reusable workflow + # owned by a different account (verified empirically: caller in a + # personal account, called in i2mint org → `${{ secrets.PYPI_PASSWORD }}` + # resolved to empty inside the reusable workflow despite the secret + # being set on the caller repo). Listing each secret here makes the + # propagation unambiguous regardless of caller-vs-called ownership. + # Missing secrets on the caller resolve to empty strings, harmless for + # the optional ones; PYPI_PASSWORD must be set for the publish job. + secrets: + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} + KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} + KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} diff --git a/heed/__init__.py b/heed/__init__.py index ec8d0be..dec4820 100644 --- a/heed/__init__.py +++ b/heed/__init__.py @@ -1,20 +1,83 @@ """heed — embeddable, no-install, framework-agnostic end-user feedback. -``heed`` lets any visitor of a deployed web app report a bug or request a feature, -with useful context gathered automatically, routed to a pluggable backend (GitHub -Issues first). It works standalone and integrates with ``enlace`` as an optional -add-on, without depending on it. +``heed`` lets any visitor of a deployed web app report a bug or request a feature, with +useful context gathered automatically, routed to a pluggable backend (GitHub Issues +first). It works standalone and integrates with ``enlace`` as an optional add-on, +without depending on it. -This package is in its **design phase**. The competitive landscape, the design -rationale, and the roadmap live in ``misc/docs/`` and in the project's GitHub -issues and discussions. The public API will grow from here; for now this module -exposes only the version. +Phase 1 ships the backend core (the widget is tracked in issue #6). The competitive +landscape and design rationale live in ``misc/docs/``. -See: - - ``misc/docs/research-report.md`` — the deep-research landscape. - - ``misc/docs/design.md`` — architecture, the data model, and the sink interface. +Public API (Phase 1): + Models Report, ReportSubmission, Identity, Environment, Category, Status, + Attachment, LogEntry, NetEntry + Sinks Sink, BaseSink, SinkResult, StoreSink, GitHubIssuesSink, + github_sink_from_token + Backend process_submission, make_router, make_app, HeedConfig + Storage make_report_store, make_attachment_store + +Example: + >>> from heed import StoreSink, ReportSubmission, process_submission + >>> store = {} + >>> report, result = process_submission( + ... ReportSubmission(title="Login button does nothing"), StoreSink(store) + ... ) + >>> result.ok and report.id in store + True """ +from heed.base import ( + Attachment, + Category, + Environment, + Identity, + LogEntry, + NetEntry, + Report, + ReportSubmission, + Status, + new_report_id, +) +from heed.config import HeedConfig +from heed.ingest import make_app, make_router, process_submission +from heed.sinks import ( + BaseSink, + GitHubIssuesSink, + Sink, + SinkResult, + StoreSink, + github_sink_from_token, +) +from heed.storage import make_attachment_store, make_report_store + __version__ = "0.0.1" -__all__ = ["__version__"] +__all__ = [ + "__version__", + # models + "Report", + "ReportSubmission", + "Identity", + "Environment", + "Category", + "Status", + "Attachment", + "LogEntry", + "NetEntry", + "new_report_id", + # sinks + "Sink", + "BaseSink", + "SinkResult", + "StoreSink", + "GitHubIssuesSink", + "github_sink_from_token", + # backend + "HeedConfig", + "process_submission", + "make_router", + "make_app", + # storage + "make_report_store", + "make_attachment_store", +] diff --git a/heed/base.py b/heed/base.py new file mode 100644 index 0000000..672c968 --- /dev/null +++ b/heed/base.py @@ -0,0 +1,133 @@ +"""Domain model for heed — the single source of truth shared by widget and backend. + +Everything that crosses the widget↔backend boundary, or moves between the ingest +layer and a sink, is one of the Pydantic models defined here. The widget POSTs a +:class:`ReportSubmission` (what an untrusted client may assert); the backend enriches +it into a :class:`Report` (server-assigned id, timestamp, resolved identity, validated +origin, status) before handing it to a sink. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from enum import Enum +from uuid import uuid4 + +from pydantic import BaseModel, Field + + +def new_report_id() -> str: + """Return a fresh opaque report id (uuid4 hex).""" + return uuid4().hex + + +def utcnow() -> datetime: + """Return the current UTC time (an injectable clock seam for tests).""" + return datetime.now(timezone.utc) + + +class Category(str, Enum): + """What kind of feedback a report is.""" + + bug = "bug" + feature = "feature" + question = "question" + other = "other" + + +class Status(str, Enum): + """Lifecycle of a report (flattened; see misc/docs/design.md for the full map).""" + + received = "received" + triaged = "triaged" + planned = "planned" + started = "started" + completed = "completed" + declined = "declined" + duplicate = "duplicate" + + +class Environment(BaseModel): + """Client environment captured by default (no PII beyond the user agent).""" + + user_agent: str | None = None + browser: str | None = None + os: str | None = None + viewport: str | None = Field(default=None, description='e.g. "1280x720"') + locale: str | None = None + device_pixel_ratio: float | None = None + extra: dict[str, str] = Field(default_factory=dict) + + +class LogEntry(BaseModel): + """One captured console entry (opt-in capture only).""" + + level: str + message: str + at: datetime | None = None + source: str | None = None + + +class NetEntry(BaseModel): + """One captured network entry (opt-in; metadata only, never bodies).""" + + method: str + url: str + status: int | None = None + duration_ms: float | None = None + ok: bool | None = None + + +class Attachment(BaseModel): + """A stored binary artifact (e.g. a screenshot), referenced by store key.""" + + kind: str = "screenshot" + media_type: str = "image/png" + ref: str + size: int | None = None + + +class Identity(BaseModel): + """Who submitted the report — anonymous by default. + + ``anon_id`` is an opaque random id (the default). ``user`` is set only when an + authenticated session is present (e.g. via enlace_auth). The client IP is NEVER + stored here — it is used transiently for rate limiting only. + """ + + anon_id: str + user: str | None = None + + +class ReportSubmission(BaseModel): + """The (untrusted) payload a widget POSTs; server fields are assigned later.""" + + category: Category = Category.bug + title: str = Field(min_length=1, max_length=300) + body: str = Field(default="", max_length=20_000) + page_url: str = Field(default="", max_length=2_000) + env: Environment = Field(default_factory=Environment) + console: list[LogEntry] | None = None + network: list[NetEntry] | None = None + # Opaque id the widget may carry across submissions; server still owns identity. + anon_id: str | None = None + + +class Report(BaseModel): + """A server-enriched report — the unit a sink receives.""" + + id: str = Field(default_factory=new_report_id) + created_at: datetime = Field(default_factory=utcnow) + category: Category = Category.bug + title: str + body: str = "" + page_url: str = "" + env: Environment = Field(default_factory=Environment) + attachments: list[Attachment] = Field(default_factory=list) + console: list[LogEntry] | None = None + network: list[NetEntry] | None = None + identity: Identity + origin: str = "" + status: Status = Status.received + labels: list[str] = Field(default_factory=list) + extra: dict[str, str] = Field(default_factory=dict) diff --git a/heed/config.py b/heed/config.py new file mode 100644 index 0000000..5a49838 --- /dev/null +++ b/heed/config.py @@ -0,0 +1,17 @@ +"""Configuration for the heed backend (smart defaults; keyword-only).""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(kw_only=True) +class HeedConfig: + """Knobs for the ingest endpoint. All optional with sensible defaults.""" + + allowed_origins: list[str] | None = None # None = allow any (dev) + max_body_bytes: int = 2_000_000 # 2 MB total + max_screenshot_bytes: int = 5_000_000 # 5 MB + rate_limit_per_minute: int = 30 + accept_console: bool = False # opt-in heavy capture (privacy by default) + accept_network: bool = False diff --git a/heed/ingest.py b/heed/ingest.py new file mode 100644 index 0000000..7a66612 --- /dev/null +++ b/heed/ingest.py @@ -0,0 +1,208 @@ +"""The ingest layer — build a Report from a submission and route it to a sink. + +``process_submission`` is the pure, framework-free core (dependency-injected sink, +stores, clock, id generator) so it is unit-testable without a server. ``make_router`` +wraps it in a FastAPI ``APIRouter`` you can run standalone (``make_app``) or +``include_router`` into an enlace app. Identity is anonymous by default; pass a +``user_dependency`` to bind identity when an auth layer (e.g. enlace_auth) is present. + +NOTE: this module does not ``from __future__ import annotations`` — FastAPI resolves +route-handler annotations against module globals, so the request/response types are +imported at module level. +""" + +import json +from collections.abc import Callable, MutableMapping +from typing import Any, Optional + +from fastapi import ( + APIRouter, + Depends, + FastAPI, + File, + Form, + HTTPException, + Request, + UploadFile, +) + +from heed.base import ( + Attachment, + Identity, + Report, + ReportSubmission, + Status, + new_report_id, + utcnow, +) +from heed.config import HeedConfig +from heed.security import InMemoryRateLimiter, RateLimiter, origin_allowed +from heed.sinks.base import Sink, SinkResult + + +def process_submission( + submission: ReportSubmission, + sink: Sink, + *, + origin: str = "", + user: Optional[str] = None, + screenshot: Optional[bytes] = None, + attachment_store: Optional[MutableMapping] = None, + config: Optional[HeedConfig] = None, + id_gen: Callable[[], str] = new_report_id, + clock: Callable[[], Any] = utcnow, +) -> tuple[Report, SinkResult]: + """Enrich ``submission`` into a Report, store any screenshot, and route to ``sink``. + + Returns the (report, sink-result) pair. The server owns id/created_at/identity/ + origin/status — never the client. Opt-in capture (console/network) is dropped unless + enabled in ``config``. + """ + config = config or HeedConfig() + report_id = id_gen() + + attachments: list[Attachment] = [] + if screenshot is not None and attachment_store is not None: + key = f"{report_id}.png" + attachment_store[key] = screenshot + attachments.append(Attachment(ref=key, size=len(screenshot))) + + report = Report( + id=report_id, + created_at=clock(), + category=submission.category, + title=submission.title, + body=submission.body, + page_url=submission.page_url, + env=submission.env, + attachments=attachments, + console=submission.console if config.accept_console else None, + network=submission.network if config.accept_network else None, + identity=Identity(anon_id=submission.anon_id or report_id, user=user), + origin=origin, + status=Status.received, + ) + result = sink.submit(report) + return report, result + + +def _build_submission( + *, + title: str, + body: str, + category: str, + page_url: str, + env: str, + anon_id: Optional[str], +) -> ReportSubmission: + env_obj = json.loads(env) if env else {} + return ReportSubmission( + title=title, + body=body, + category=category, + page_url=page_url, + env=env_obj, + anon_id=anon_id, + ) + + +def make_router( + sink: Sink, + *, + config: Optional[HeedConfig] = None, + attachment_store: Optional[MutableMapping] = None, + rate_limiter: Optional[RateLimiter] = None, + user_dependency: Optional[Callable[..., Any]] = None, +) -> APIRouter: + """Build a FastAPI APIRouter exposing ``POST /report``. + + Run standalone via :func:`make_app`, or ``include_router`` it into an enlace app. + Provide ``user_dependency`` (a FastAPI dependency returning a user id or None) to + bind identity when an auth layer like enlace_auth is present. + """ + cfg = config or HeedConfig() + limiter = rate_limiter or InMemoryRateLimiter( + max_per_window=cfg.rate_limit_per_minute + ) + router = APIRouter() + + async def _no_user() -> Optional[str]: + return None + + user_dep = user_dependency or _no_user + + @router.post("/report") + async def submit_report( + request: Request, + title: str = Form(...), + body: str = Form(""), + category: str = Form("bug"), + page_url: str = Form(""), + env: str = Form("{}"), + anon_id: Optional[str] = Form(None), + screenshot: Optional[UploadFile] = File(None), + user: Optional[str] = Depends(user_dep), + ): + origin = request.headers.get("origin", "") + if not origin_allowed(origin or None, cfg.allowed_origins): + raise HTTPException(status_code=403, detail="origin not allowed") + client_key = request.client.host if request.client else "unknown" + if not limiter.allow(client_key): + raise HTTPException(status_code=429, detail="rate limit exceeded") + + try: + submission = _build_submission( + title=title, + body=body, + category=category, + page_url=page_url, + env=env, + anon_id=anon_id, + ) + except Exception as e: + raise HTTPException( + status_code=422, detail=f"invalid submission: {e}" + ) from e + + shot: Optional[bytes] = None + if screenshot is not None: + shot = await screenshot.read() + if len(shot) > cfg.max_screenshot_bytes: + raise HTTPException(status_code=413, detail="screenshot too large") + + report, result = process_submission( + submission, + sink, + origin=origin, + user=user, + screenshot=shot, + attachment_store=attachment_store, + config=cfg, + ) + return { + "id": report.id, + "ok": result.ok, + "ref": result.external_ref, + "url": result.url, + } + + return router + + +def make_app( + sink: Sink, + *, + config: Optional[HeedConfig] = None, + attachment_store: Optional[MutableMapping] = None, + prefix: str = "/heed", + **router_kwargs: Any, +) -> FastAPI: + """A standalone FastAPI app serving the heed router under ``prefix``.""" + app = FastAPI(title="heed") + app.include_router( + make_router( + sink, config=config, attachment_store=attachment_store, **router_kwargs + ), + prefix=prefix, + ) + return app diff --git a/heed/security.py b/heed/security.py new file mode 100644 index 0000000..6ad550a --- /dev/null +++ b/heed/security.py @@ -0,0 +1,58 @@ +"""Public-endpoint hardening seams (the ingest endpoint is unauthenticated). + +Phase 1 ships the seams + simple in-memory defaults: an origin allow-list check, a +payload-size guard (enforced in the router), and a fixed-window rate limiter. +Production hardening (Cloudflare Turnstile, a Redis-backed limiter) plugs in behind the +same interfaces — see issue #5. +""" + +from __future__ import annotations + +import time +from collections import defaultdict, deque +from collections.abc import Callable, Iterable +from typing import Protocol + + +def origin_allowed(origin: str | None, allowed: Iterable[str] | None) -> bool: + """True if origin is permitted; ``allowed=None`` allows any (the dev default).""" + if allowed is None: + return True + return origin is not None and origin in set(allowed) + + +class RateLimiter(Protocol): + """Decide whether a request keyed by ``key`` (e.g. client IP) is allowed now.""" + + def allow(self, key: str) -> bool: ... + + +class InMemoryRateLimiter: + """Fixed-window, in-memory limiter. NOT multi-process safe — see issue #5. + + Suitable for a single-worker dev/standalone deployment; swap for a Redis-backed + limiter in production. + """ + + def __init__( + self, + *, + max_per_window: int = 30, + window_seconds: float = 60.0, + clock: Callable[[], float] = time.monotonic, + ): + self.max_per_window = max_per_window + self.window_seconds = window_seconds + self._clock = clock + self._hits: dict[str, deque] = defaultdict(deque) + + def allow(self, key: str) -> bool: + now = self._clock() + hits = self._hits[key] + cutoff = now - self.window_seconds + while hits and hits[0] < cutoff: + hits.popleft() + if len(hits) >= self.max_per_window: + return False + hits.append(now) + return True diff --git a/heed/sinks/__init__.py b/heed/sinks/__init__.py new file mode 100644 index 0000000..6ef6b11 --- /dev/null +++ b/heed/sinks/__init__.py @@ -0,0 +1,38 @@ +"""Sinks: pluggable report destinations (strategy pattern). + +Import the sink you need, or look one up by short name via :func:`get_sink_class`. +""" + +from heed.sinks.base import BaseSink, DupCandidate, Sink, SinkResult +from heed.sinks.github import ( + GitHubIssuesSink, + github_sink_from_token, + render_issue_body, +) +from heed.sinks.store import StoreSink + +_REGISTRY: dict[str, type] = { + "store": StoreSink, + "github": GitHubIssuesSink, +} + + +def get_sink_class(name: str) -> type: + """Return a registered sink class by short name (e.g. 'github', 'store').""" + try: + return _REGISTRY[name] + except KeyError: + raise KeyError(f"Unknown sink {name!r}. Known: {sorted(_REGISTRY)}") from None + + +__all__ = [ + "Sink", + "BaseSink", + "SinkResult", + "DupCandidate", + "StoreSink", + "GitHubIssuesSink", + "github_sink_from_token", + "render_issue_body", + "get_sink_class", +] diff --git a/heed/sinks/base.py b/heed/sinks/base.py new file mode 100644 index 0000000..7323947 --- /dev/null +++ b/heed/sinks/base.py @@ -0,0 +1,54 @@ +"""The sink abstraction — where a report goes (strategy pattern). + +A sink is the only thing that decides what becomes of a report. The ingest layer calls +:meth:`Sink.submit`; richer sinks may also offer duplicate detection and +acknowledgement. New destinations are added by writing a sink, never by editing a +dispatcher. +""" + +from __future__ import annotations + +from typing import Protocol, runtime_checkable + +from pydantic import BaseModel + +from heed.base import Report + + +class SinkResult(BaseModel): + """Outcome of routing a report to a sink.""" + + ok: bool + external_ref: str | None = None # e.g. issue number, store key + url: str | None = None # e.g. issue URL + detail: str | None = None + + +class DupCandidate(BaseModel): + """A possible duplicate of an incoming report.""" + + external_ref: str + score: float = 0.0 + title: str | None = None + + +@runtime_checkable +class Sink(Protocol): + """Anything that can receive a report. ``submit`` is the only required method.""" + + def submit(self, report: Report) -> SinkResult: ... + + +class BaseSink: + """Convenience base giving no-op defaults for the optional sink methods.""" + + def submit(self, report: Report) -> SinkResult: # pragma: no cover - abstract + raise NotImplementedError + + def find_duplicates(self, report: Report) -> list[DupCandidate]: + """Return likely duplicates of ``report`` (default: none).""" + return [] + + def acknowledge(self, report: Report) -> None: + """Hook called after a successful submit (default: no-op).""" + return None diff --git a/heed/sinks/github.py b/heed/sinks/github.py new file mode 100644 index 0000000..c19d667 --- /dev/null +++ b/heed/sinks/github.py @@ -0,0 +1,129 @@ +"""A sink that turns a report into a GitHub issue. + +The issue-creating call is dependency-injected (``create_issue``) so the sink is +testable without network access and so the credential-brokering strategy (a GitHub App +minting short-lived installation tokens, or a server-side token) is pluggable. A +convenience factory builds a ``create_issue`` from a token via ``ghapi`` (lazy import, +the ``heed[github]`` extra). + +Hard platform limit (see misc/docs/design.md): there is no GitHub API to attach an image +to an issue. Resolve a ``screenshot_url`` (committed-file / attachment-store URL) and +embed it in the body; base64 data-URIs do not render. +""" + +from __future__ import annotations + +from collections.abc import Callable, Mapping +from typing import Any + +from heed.base import Category, Report +from heed.sinks.base import BaseSink, SinkResult + +# A callable that creates an issue and returns the GitHub API's issue dict. +CreateIssue = Callable[..., Mapping[str, Any]] + +DEFAULT_LABELS: dict[Category, tuple[str, ...]] = { + Category.bug: ("bug",), + Category.feature: ("enhancement",), + Category.question: ("question",), + Category.other: (), +} + + +class GitHubIssuesSink(BaseSink): + """Route a report to a GitHub issue via an injected ``create_issue`` callable.""" + + def __init__( + self, + create_issue: CreateIssue, + *, + label_map: Mapping[Category, tuple[str, ...]] | None = None, + screenshot_url: Callable[[Report], str | None] | None = None, + ): + self._create_issue = create_issue + self._label_map = dict(label_map or DEFAULT_LABELS) + self._screenshot_url = screenshot_url + + def submit(self, report: Report) -> SinkResult: + labels = list(self._label_map.get(report.category, ())) + list(report.labels) + url = self._screenshot_url(report) if self._screenshot_url else None + body = render_issue_body(report, screenshot_url=url) + issue = self._create_issue(title=report.title, body=body, labels=labels) + number = issue.get("number") + return SinkResult( + ok=True, + external_ref=str(number) if number is not None else None, + url=issue.get("html_url"), + ) + + +def render_issue_body(report: Report, *, screenshot_url: str | None = None) -> str: + """Render a report as a GitHub-flavoured markdown issue body.""" + lines: list[str] = [] + if report.body: + lines += [report.body, ""] + + reporter = report.identity.user or f"anonymous ({report.identity.anon_id[:8]}…)" + meta = [f"- **Category**: {report.category.value}"] + if report.page_url: + meta.append(f"- **URL**: {report.page_url}") + if report.env.browser or report.env.os: + env_line = f"- **Env**: {report.env.browser or '?'} on {report.env.os or '?'}" + if report.env.viewport: + env_line += f" ({report.env.viewport})" + meta.append(env_line) + meta.append(f"- **Reporter**: {reporter}") + lines += meta + [""] + + if screenshot_url: + lines += [f"![screenshot]({screenshot_url})", ""] + if report.console: + lines += _collapsible( + "Console", "\n".join(f"[{e.level}] {e.message}" for e in report.console) + ) + if report.network: + lines += _collapsible( + "Network", + "\n".join( + f"{e.method} {e.url} -> {e.status if e.status is not None else '?'}" + for e in report.network + ), + ) + return "\n".join(lines).rstrip() + "\n" + + +def _collapsible(summary: str, content: str) -> list[str]: + return [ + "
", + f"{summary}", + "", + "```", + content, + "```", + "", + "
", + "", + ] + + +def github_sink_from_token(repo: str, token: str, **kwargs) -> GitHubIssuesSink: + """Build a GitHubIssuesSink that creates issues in ``repo`` ('owner/name'). + + Requires the ``heed[github]`` extra. ``token`` is a server-side credential (a GitHub + App installation token or a PAT) — never shipped to the browser. + """ + try: + from ghapi.all import GhApi + except ImportError as e: # pragma: no cover - exercised only without the extra + raise ImportError( + "GitHubIssuesSink via token needs the 'github' extra: " + "pip install 'heed[github]'" + ) from e + + owner, _, name = repo.partition("/") + api = GhApi(owner=owner, repo=name, token=token) + + def create_issue(*, title: str, body: str, labels: list[str]) -> Mapping[str, Any]: + return api.issues.create(title=title, body=body, labels=labels) + + return GitHubIssuesSink(create_issue, **kwargs) diff --git a/heed/sinks/store.py b/heed/sinks/store.py new file mode 100644 index 0000000..8037a87 --- /dev/null +++ b/heed/sinks/store.py @@ -0,0 +1,38 @@ +"""A sink that persists reports into a Mapping (the default, dependency-free sink).""" + +from __future__ import annotations + +from collections.abc import MutableMapping + +from heed.base import Report +from heed.sinks.base import BaseSink, DupCandidate, SinkResult + + +class StoreSink(BaseSink): + """Persist each report as JSON into a dol-style store keyed by report id. + + The zero-configuration default when no external sink (e.g. GitHub) is configured. + ``store`` is any ``MutableMapping[str, str]`` (a plain dict, a ``dol`` file store, + S3, …) — dependency-injected so the sink stays testable. + """ + + def __init__(self, store: MutableMapping[str, str]): + self.store = store + + def submit(self, report: Report) -> SinkResult: + self.store[report.id] = report.model_dump_json() + return SinkResult(ok=True, external_ref=report.id) + + def find_duplicates(self, report: Report) -> list[DupCandidate]: + """Cheap title-equality duplicate scan over stored reports.""" + candidates: list[DupCandidate] = [] + needle = report.title.strip().lower() + for key, raw in self.store.items(): + if key == report.id: + continue + other = Report.model_validate_json(raw) + if other.title.strip().lower() == needle: + candidates.append( + DupCandidate(external_ref=key, score=1.0, title=other.title) + ) + return candidates diff --git a/heed/storage.py b/heed/storage.py new file mode 100644 index 0000000..1f08fdb --- /dev/null +++ b/heed/storage.py @@ -0,0 +1,26 @@ +"""Storage helpers — dol-backed stores for reports and binary attachments. + +Stores are plain ``MutableMapping``s, so the rest of heed never depends on a specific +backend. Defaults are in-memory; pass a directory to persist to the filesystem, or +inject any ``dol`` store (S3, Mongo, …). +""" + +from collections.abc import MutableMapping + + +def make_report_store(rootdir: str | None = None) -> MutableMapping: + """A text store for report JSON. In-memory unless ``rootdir`` is given.""" + if rootdir is None: + return {} + from dol import TextFiles + + return TextFiles(rootdir) + + +def make_attachment_store(rootdir: str | None = None) -> MutableMapping: + """A bytes store for attachments. In-memory unless ``rootdir`` is given.""" + if rootdir is None: + return {} + from dol import Files + + return Files(rootdir) diff --git a/pyproject.toml b/pyproject.toml index e068268..d78c467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,11 @@ ignore = ["D203"] [tool.wads.ci] installer = "uv" +[tool.wads.ci.publish] +# Design phase: do NOT auto-publish to PyPI on main pushes. Flip to true for the +# first real release (the PyPI name `heed` is reserved by this repo's intent). +enabled = false + [tool.wads.ci.testing] python_versions = ["3.10", "3.12"] coverage_enabled = false diff --git a/tests/test_base.py b/tests/test_base.py new file mode 100644 index 0000000..45dba36 --- /dev/null +++ b/tests/test_base.py @@ -0,0 +1,19 @@ +"""Tests for the heed domain model.""" + +from heed import Category, Identity, Report, ReportSubmission, Status + + +def test_report_roundtrip(): + """A Report assigns defaults and round-trips through JSON unchanged.""" + r = Report(title="x", identity=Identity(anon_id="a1")) + assert r.id and r.status is Status.received and r.category is Category.bug + r2 = Report.model_validate_json(r.model_dump_json()) + assert r2 == r + + +def test_submission_defaults(): + """A submission defaults to a bug with no opt-in capture.""" + s = ReportSubmission(title="boom") + assert s.category is Category.bug + assert s.body == "" + assert s.console is None and s.network is None diff --git a/tests/test_ingest.py b/tests/test_ingest.py new file mode 100644 index 0000000..3867848 --- /dev/null +++ b/tests/test_ingest.py @@ -0,0 +1,74 @@ +"""Tests for the ingest core and the FastAPI router.""" + +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from heed import ( + HeedConfig, + ReportSubmission, + StoreSink, + make_router, + process_submission, +) + + +def test_process_submission_enriches_and_routes(): + """The core enriches a submission, stores the screenshot, and routes to the sink.""" + store: dict = {} + attachments: dict = {} + sink = StoreSink(store) + sub = ReportSubmission(title="bug!", anon_id="widget-abc") + + report, result = process_submission( + sub, + sink, + origin="https://app.example", + screenshot=b"PNGDATA", + attachment_store=attachments, + ) + + assert result.ok and report.id in store + assert report.identity.anon_id == "widget-abc" + assert report.origin == "https://app.example" + assert report.attachments and report.attachments[0].ref in attachments + + +def test_process_submission_drops_optin_capture_by_default(): + """Console/network are dropped unless the config opts in (privacy by default).""" + from heed import LogEntry + + sub = ReportSubmission(title="x", console=[LogEntry(level="error", message="boom")]) + report, _ = process_submission(sub, StoreSink({})) + assert report.console is None # default config does not accept console capture + + +def test_router_post_report(): + """POST /report validates, routes to the sink, and returns the new id.""" + store: dict = {} + app = FastAPI() + app.include_router(make_router(StoreSink(store), config=HeedConfig())) + client = TestClient(app) + + resp = client.post( + "/report", + data={"title": "broken", "category": "bug"}, + headers={"origin": "http://test"}, + ) + assert resp.status_code == 200, resp.text + out = resp.json() + assert out["ok"] and out["id"] in store + + +def test_router_blocks_disallowed_origin(): + """An origin allow-list rejects unknown origins with 403.""" + app = FastAPI() + app.include_router( + make_router( + StoreSink({}), config=HeedConfig(allowed_origins=["https://ok.example"]) + ) + ) + client = TestClient(app) + resp = client.post( + "/report", data={"title": "x"}, headers={"origin": "https://evil.example"} + ) + assert resp.status_code == 403 diff --git a/tests/test_sinks.py b/tests/test_sinks.py new file mode 100644 index 0000000..fdef92d --- /dev/null +++ b/tests/test_sinks.py @@ -0,0 +1,41 @@ +"""Tests for sinks (store + github).""" + +from heed import Category, GitHubIssuesSink, Identity, Report, StoreSink + + +def _report(**kw) -> Report: + kw.setdefault("title", "T") + kw.setdefault("identity", Identity(anon_id="anon12345")) + return Report(**kw) + + +def test_store_sink_persists_and_dedups(): + """StoreSink writes JSON keyed by id and finds same-title duplicates.""" + store: dict = {} + sink = StoreSink(store) + r1 = _report(title="same") + res = sink.submit(r1) + assert res.ok and store[r1.id] + + r2 = _report(title="same") + sink.submit(r2) + dups = sink.find_duplicates(r2) + assert any(d.external_ref == r1.id for d in dups) + + +def test_github_sink_uses_injected_create_issue(): + """GitHubIssuesSink delegates to the injected callable and maps category->labels.""" + calls: dict = {} + + def fake_create_issue(*, title, body, labels): + calls.update(title=title, body=body, labels=labels) + return {"number": 42, "html_url": "https://github.com/x/y/issues/42"} + + sink = GitHubIssuesSink(fake_create_issue) + r = _report(title="Login broken", category=Category.bug, body="it broke") + res = sink.submit(r) + + assert res.ok and res.external_ref == "42" and res.url.endswith("/42") + assert calls["title"] == "Login broken" + assert "bug" in calls["labels"] + assert "it broke" in calls["body"]