From aac3d832a323fe557c178534b54372f784fd5974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:17:19 +0200
Subject: [PATCH 01/11] [#34] add MediaPhotoDescribed variant and discriminator
 updates

Introduce a fifth media variant carrying the vision-LLM output:
is_decorative + description + description_lang + description_version +
described_at, on top of the downloaded-state fields. The discriminated
union grows from four to five `kind`s; the path-traversal validator
carries over verbatim, since the on-disk bytes are inherited from the
prior downloaded state.

Update every isinstance cascade that walked the union (media.py
eligibility, generate.py rendering + mirror, diff.py counting) to
handle the new variant explicitly so `assert_never` keeps the
exhaustiveness check honest. The diff report grows a `described`
column and a `delta_described` field; the count is exposed through
both the text and JSON renderers.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/xbrain/diff.py     |  30 ++++++++---
 src/xbrain/generate.py |  21 +++++---
 src/xbrain/media.py    |  43 ++++++++++++---
 src/xbrain/models.py   |  81 ++++++++++++++++++++++++++--
 tests/test_models.py   | 116 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 264 insertions(+), 27 deletions(-)

diff --git a/src/xbrain/diff.py b/src/xbrain/diff.py
index f41715d..3774097 100644
--- a/src/xbrain/diff.py
+++ b/src/xbrain/diff.py
@@ -32,6 +32,7 @@
 
 from xbrain.models import (
     Item,
+    MediaPhotoDescribed,
     MediaPhotoDownloaded,
     MediaPhotoFailed,
     MediaPhotoPending,
@@ -132,13 +133,15 @@ class VocabDiff(BaseModel):
 class MediaStateCounts(BaseModel):
     """Counts of media variants on one side of a diff.
 
-    Mirrors the four-variant union: downloaded / pending / failed
-    / video_pending. Counts are global across all items in the store —
-    per-item resolution lives on the items themselves (already
-    diff-able via the items round-trip).
+    Mirrors the five-variant union: downloaded / described / pending /
+    failed / video_pending. Counts are global across all items in the
+    store — per-item resolution lives on the items themselves (already
+    diff-able via the items round-trip). `described` is the terminal
+    state for content-bearing photos after a vision-describe pass.
     """
 
     downloaded: int = 0
+    described: int = 0
     pending: int = 0
     failed: int = 0
     video_pending: int = 0
@@ -149,14 +152,18 @@ class MediaDiff(BaseModel):
 
     `delta_downloaded = b.downloaded - a.downloaded` answers the
     operator's first question after `xbrain media`: "how many new photos
-    landed?" Same for the other three variants. Negative deltas are
-    legal — `xbrain media --force` can move a previously-downloaded
-    photo back to `failed` if the URL turned 404 in the meantime.
+    landed?" `delta_described` answers the analogous question after
+    `xbrain describe`: "how many photos transitioned from downloaded to
+    described?" Negative deltas are legal — `--force` runs can move
+    photos backwards along the pipeline (e.g. a `describe --force`
+    behind a CDN change drops the previous description; a
+    `media --force` re-download collapses described back to downloaded).
     """
 
     a: MediaStateCounts = Field(default_factory=MediaStateCounts)
     b: MediaStateCounts = Field(default_factory=MediaStateCounts)
     delta_downloaded: int = 0
+    delta_described: int = 0
     delta_pending: int = 0
     delta_failed: int = 0
     delta_video_pending: int = 0
@@ -176,6 +183,7 @@ class DiffSummary(BaseModel):
     topic_pages_a: int
     topic_pages_b: int
     media_delta_downloaded: int = 0
+    media_delta_described: int = 0
     media_delta_failed: int = 0
 
 
@@ -271,6 +279,7 @@ def _load_or_explain(path: Path, loader):
         topic_pages_a=len(pages_a),
         topic_pages_b=len(pages_b),
         media_delta_downloaded=media_diff.delta_downloaded,
+        media_delta_described=media_diff.delta_described,
         media_delta_failed=media_diff.delta_failed,
     )
     return DiffReport(
@@ -400,6 +409,7 @@ def _compute_media_diff(
         a=counts_a,
         b=counts_b,
         delta_downloaded=counts_b.downloaded - counts_a.downloaded,
+        delta_described=counts_b.described - counts_a.described,
         delta_pending=counts_b.pending - counts_a.pending,
         delta_failed=counts_b.failed - counts_a.failed,
         delta_video_pending=counts_b.video_pending - counts_a.video_pending,
@@ -407,12 +417,14 @@ def _compute_media_diff(
 
 
 def _count_media_variants(items: dict[str, Item]) -> MediaStateCounts:
-    """Tally the four media variants across every item in the store."""
+    """Tally the five media variants across every item in the store."""
     counts = MediaStateCounts()
     for item in items.values():
         for entry in item.media:
             if isinstance(entry, MediaPhotoDownloaded):
                 counts.downloaded += 1
+            elif isinstance(entry, MediaPhotoDescribed):
+                counts.described += 1
             elif isinstance(entry, MediaPhotoPending):
                 counts.pending += 1
             elif isinstance(entry, MediaPhotoFailed):
@@ -528,6 +540,8 @@ def _format_media_block(media: MediaDiff) -> list[str]:
     return [
         f"  downloaded:    A={media.a.downloaded:>5}  B={media.b.downloaded:>5}  "
         f"({_format_delta(media.delta_downloaded)})",
+        f"  described:     A={media.a.described:>5}  B={media.b.described:>5}  "
+        f"({_format_delta(media.delta_described)})",
         f"  pending:       A={media.a.pending:>5}  B={media.b.pending:>5}  "
         f"({_format_delta(media.delta_pending)})",
         f"  failed:        A={media.a.failed:>5}  B={media.b.failed:>5}  "
diff --git a/src/xbrain/generate.py b/src/xbrain/generate.py
index 92bf149..83d6574 100644
--- a/src/xbrain/generate.py
+++ b/src/xbrain/generate.py
@@ -16,6 +16,7 @@
     ContentSourceSuccess,
     FailureReason,
     Item,
+    MediaPhotoDescribed,
     MediaPhotoDownloaded,
     MediaPhotoFailed,
     MediaPhotoPending,
@@ -196,10 +197,16 @@ def _render_media_lines(item: Item) -> list[str]:
     """One line per `Item.media` entry, ready to splice into the Tweet section.
 
     Variant handling:
-    - `MediaPhotoDownloaded` → Obsidian embed `![[_media/<id>/<n>.<ext>]]`.
-      The vault is self-contained: `generate()` mirrors the file from
-      `data/media/` into `<output_dir>/_media/` before rendering, so the
-      embed resolves with no user configuration.
+    - `MediaPhotoDownloaded` / `MediaPhotoDescribed` → Obsidian embed
+      `![[_media/<id>/<n>.<ext>]]`. The vault is self-contained:
+      `generate()` mirrors the file from `data/media/` into
+      `<output_dir>/_media/` before rendering, so the embed resolves
+      with no user configuration. The described variant inherits the
+      same on-disk file — the description is consumed by the LLM
+      prompts in `executors/api.py` / `topic_synth.py`, NOT shown as
+      alt-text in this phase. Decorative photos are still embedded;
+      the `is_decorative` flag only filters them out of the LLM prompts,
+      never out of the visual rendering.
     - `MediaPhotoFailed`      → one-line ⚠ warning carrying the failure
       reason and the original URL — visible evidence, not a silent drop.
     - `MediaPhotoPending`     → silent. Not an error, just "the next
@@ -213,7 +220,7 @@ def _render_media_lines(item: Item) -> list[str]:
     """
     lines: list[str] = []
     for entry in item.media:
-        if isinstance(entry, MediaPhotoDownloaded):
+        if isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed)):
             lines.append(f"![[{_VAULT_MEDIA_SUBDIR}/{entry.local_path}]]")
         elif isinstance(entry, MediaPhotoFailed):
             reason = _FAILURE_ES_MEDIA.get(entry.failure_reason, entry.failure_reason)
@@ -259,7 +266,9 @@ def _mirror_item_media(item: Item, media_root: Path, vault_media_dir: Path) -> N
     """
     vault_media_dir.mkdir(parents=True, exist_ok=True)
     for entry in item.media:
-        if not isinstance(entry, MediaPhotoDownloaded):
+        # The described variant inherits the on-disk bytes from the
+        # prior downloaded state — both shapes hit the same mirror path.
+        if not isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed)):
             continue
         source = media_root / entry.local_path
         destination = vault_media_dir / entry.local_path
diff --git a/src/xbrain/media.py b/src/xbrain/media.py
index 67aaa57..c52322e 100644
--- a/src/xbrain/media.py
+++ b/src/xbrain/media.py
@@ -35,6 +35,7 @@
     Item,
     MediaEntry,
     MediaFailureReason,
+    MediaPhotoDescribed,
     MediaPhotoDownloaded,
     MediaPhotoFailed,
     MediaPhotoPending,
@@ -210,12 +211,21 @@ def download_all(
 
 
 def _is_eligible(entry: MediaEntry, *, force: bool) -> bool:
-    """Decide whether `download_all` should attempt this entry on THIS run."""
+    """Decide whether `download_all` should attempt this entry on THIS run.
+
+    The described variant inherits the downloaded contract: a re-download
+    only happens under `--force`. A `--force` re-download drops the
+    description (the entry collapses back to `MediaPhotoDownloaded`) —
+    `xbrain describe` is the path that re-adds it. Forcing the bytes
+    without re-describing is the rare case (e.g. the X CDN replaced the
+    asset); the operator is expected to follow with `xbrain describe
+    --force` if the new bytes warrant it.
+    """
     if isinstance(entry, MediaVideoPending):
         return False
     if isinstance(entry, MediaPhotoPending):
         return True
-    if isinstance(entry, MediaPhotoDownloaded):
+    if isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed)):
         return force
     if isinstance(entry, MediaPhotoFailed):
         if force:
@@ -243,15 +253,24 @@ def _iter_eligible_attempts(
     limit: int | None,
     force: bool,
     report: MediaReport,
-) -> Iterator[tuple[str, Item, int, MediaPhotoPending | MediaPhotoFailed | MediaPhotoDownloaded]]:
+) -> Iterator[
+    tuple[
+        str,
+        Item,
+        int,
+        MediaPhotoPending | MediaPhotoFailed | MediaPhotoDownloaded | MediaPhotoDescribed,
+    ]
+]:
     """Yield each (item_id, item, index, entry) pair eligible for download.
 
     Encapsulates the empty-media skip + per-entry eligibility cascade +
     global limit countdown that `download_all` would otherwise interleave
     with the download orchestration. Side effects on `report`: bumps
     `items_processed` once per item that has media, and
-    `photos_skipped_already_downloaded` once per Downloaded entry passed
-    over (without `--force`). Stops yielding once `limit` is exhausted.
+    `photos_skipped_already_downloaded` once per Downloaded / Described
+    entry passed over (without `--force`) — both share the
+    "bytes-already-on-disk" semantics from the downloader's perspective.
+    Stops yielding once `limit` is exhausted.
     """
     remaining = limit
     for item_id, item in items.items():
@@ -262,11 +281,19 @@ def _iter_eligible_attempts(
             if remaining is not None and remaining <= 0:
                 return
             if not _is_eligible(entry, force=force):
-                if isinstance(entry, MediaPhotoDownloaded):
+                if isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed)):
                     report.photos_skipped_already_downloaded += 1
                 continue
             # `_is_eligible` already excluded `MediaVideoPending`; narrow for mypy.
-            assert isinstance(entry, (MediaPhotoPending, MediaPhotoFailed, MediaPhotoDownloaded))
+            assert isinstance(
+                entry,
+                (
+                    MediaPhotoPending,
+                    MediaPhotoFailed,
+                    MediaPhotoDownloaded,
+                    MediaPhotoDescribed,
+                ),
+            )
             if remaining is not None:
                 remaining -= 1
             yield item_id, item, index, entry
@@ -305,7 +332,7 @@ def _record_outcome(
 
 
 def _download_one(
-    entry: MediaPhotoPending | MediaPhotoFailed | MediaPhotoDownloaded,
+    entry: MediaPhotoPending | MediaPhotoFailed | MediaPhotoDownloaded | MediaPhotoDescribed,
     *,
     item_id: str,
     index: int,
diff --git a/src/xbrain/models.py b/src/xbrain/models.py
index 13296ce..b018b1e 100644
--- a/src/xbrain/models.py
+++ b/src/xbrain/models.py
@@ -156,6 +156,62 @@ class MediaPhotoFailed(_MediaPhotoBase):
     last_attempt_at: datetime
 
 
+class MediaPhotoDescribed(_MediaPhotoBase):
+    """A downloaded photo that has also been described by a vision LLM.
+
+    The terminal state for a content-bearing photo: the bytes still live
+    at `local_path` (every `MediaPhotoDownloaded` invariant carries over)
+    AND a vision pass has classified the image and written a short prose
+    description. Decorative photos (avatars, reaction memes, abstract
+    backgrounds) reach this variant too — they carry `is_decorative=True`
+    and an empty `description`, so downstream callers can filter them out
+    without re-classifying.
+
+    `description_version` is the rubric/version tag the description was
+    produced under. Bumping the configured version invalidates existing
+    entries: the next `xbrain describe` run treats them as stale and
+    re-describes (no `--force` needed). `description_lang` records the
+    language the prose was written in so a vault that switches
+    `output_language` mid-corpus can be re-described selectively.
+
+    `description` is empty for decorative photos by contract — the
+    vision rubric returns an empty string in that case and a refusal
+    (faces, NSFW) is also bucketed as decorative with empty description.
+    No `gt=0` constraint here for that reason.
+    """
+
+    kind: Literal["photo_described"] = "photo_described"
+    local_path: str
+    width: int = Field(gt=0)
+    height: int = Field(gt=0)
+    bytes_size: int = Field(gt=0)
+    downloaded_at: datetime
+    is_decorative: bool
+    description: str
+    description_lang: str
+    description_version: str
+    described_at: datetime
+
+    @field_validator("local_path")
+    @classmethod
+    def _reject_path_traversal(cls, value: str) -> str:
+        """Reject absolute paths and `..` components.
+
+        Mirrors `MediaPhotoDownloaded._reject_path_traversal` — the
+        described variant inherits the same on-disk path contract because
+        the bytes from the prior Downloaded state are still referenced
+        verbatim. Persisted `items.json` is on-disk plain JSON the user
+        can edit, so the defence in depth is reapplied here.
+        """
+        _ = cls
+        if value.startswith("/") or value.startswith("\\"):
+            raise ValueError(f"local_path must be relative, got {value!r}")
+        for part in value.replace("\\", "/").split("/"):
+            if part == "..":
+                raise ValueError(f"local_path must not contain '..' components: {value!r}")
+        return value
+
+
 class MediaVideoPending(BaseModel):
     """A video URL captured but not downloaded.
 
@@ -181,9 +237,12 @@ def _normalise_legacy_media(value: Any) -> Any:
 
     Records that already carry a ``kind`` field are passed through
     unchanged — they are either fresh (extract-time) or already in the
-    tagged-union shape. A record with neither `kind` nor a recognised
-    `type` is passed through unchanged so pydantic raises a clean
-    discriminator error rather than this validator inventing a state.
+    tagged-union shape (`photo_pending` / `photo_downloaded` /
+    `photo_failed` / `photo_described` / `video_pending`). A record with
+    neither `kind` nor a recognised `type` is passed through unchanged
+    so pydantic raises a clean discriminator error rather than this
+    validator inventing a state. The described variant has no legacy
+    shape — it can only originate from a vision-describe run.
     """
     if not isinstance(value, dict):
         return value
@@ -203,7 +262,13 @@ def _normalise_legacy_media(value: Any) -> Any:
 # (see the long comment above): the discriminator check must run AFTER
 # the legacy normaliser, otherwise legacy records get rejected.
 _MediaTagged = Annotated[
-    Union[MediaPhotoPending, MediaPhotoDownloaded, MediaPhotoFailed, MediaVideoPending],
+    Union[
+        MediaPhotoPending,
+        MediaPhotoDownloaded,
+        MediaPhotoFailed,
+        MediaPhotoDescribed,
+        MediaVideoPending,
+    ],
     Field(discriminator="kind"),
 ]
 MediaEntry = Annotated[
@@ -215,7 +280,13 @@ def _normalise_legacy_media(value: Any) -> Any:
 # TypeAdapter for tests / ad-hoc validation of a single entry outside an
 # `Item` context (mirrors `ContentSourceAdapter`).
 MediaEntryAdapter: TypeAdapter[
-    Union[MediaPhotoPending, MediaPhotoDownloaded, MediaPhotoFailed, MediaVideoPending]
+    Union[
+        MediaPhotoPending,
+        MediaPhotoDownloaded,
+        MediaPhotoFailed,
+        MediaPhotoDescribed,
+        MediaVideoPending,
+    ]
 ] = TypeAdapter(MediaEntry)
 
 
diff --git a/tests/test_models.py b/tests/test_models.py
index 7b0bc84..8df67c9 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -292,6 +292,122 @@ def test_media_new_tagged_shape_passes_through():
     assert entry.width == 1200
 
 
+def test_media_photo_described_round_trips_through_json():
+    """A `MediaPhotoDescribed` payload reads back as the same variant.
+
+    Exercises the discriminator path: the new `photo_described` kind
+    must match exactly one variant. A round-trip dump must NOT collapse
+    back to `MediaPhotoDownloaded` (which carries the same on-disk
+    fields minus the description payload).
+    """
+    from datetime import datetime, timezone
+
+    from xbrain.models import MediaEntryAdapter, MediaPhotoDescribed
+
+    payload = {
+        "kind": "photo_described",
+        "type": "photo",
+        "url": "https://pbs.twimg.com/media/X.jpg",
+        "local_path": "123/0.jpg",
+        "width": 1200,
+        "height": 800,
+        "bytes_size": 99000,
+        "downloaded_at": datetime(2026, 5, 24, tzinfo=timezone.utc).isoformat(),
+        "is_decorative": False,
+        "description": "A chart showing model accuracy by parameter count.",
+        "description_lang": "English",
+        "description_version": "v1",
+        "described_at": datetime(2026, 5, 24, 12, tzinfo=timezone.utc).isoformat(),
+    }
+    entry = MediaEntryAdapter.validate_python(payload)
+    assert isinstance(entry, MediaPhotoDescribed)
+    assert entry.is_decorative is False
+    assert entry.description.startswith("A chart")
+    assert entry.description_lang == "English"
+    assert entry.description_version == "v1"
+    # Re-dump and re-parse: variant must survive verbatim.
+    restored = MediaEntryAdapter.validate_python(entry.model_dump(mode="json"))
+    assert isinstance(restored, MediaPhotoDescribed)
+    assert restored == entry
+
+
+def test_media_photo_described_decorative_carries_empty_description():
+    """Decorative entries store an empty description by contract.
+
+    The vision rubric returns empty for decorative; refusals (faces, NSFW)
+    are bucketed as decorative + empty. The model accepts this explicitly
+    — there is no `gt=0` length constraint on `description`.
+    """
+    from datetime import datetime, timezone
+
+    from xbrain.models import MediaPhotoDescribed
+
+    entry = MediaPhotoDescribed(
+        url="https://pbs.twimg.com/media/X.jpg",
+        local_path="123/0.jpg",
+        width=400,
+        height=400,
+        bytes_size=12000,
+        downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        is_decorative=True,
+        description="",
+        description_lang="English",
+        description_version="v1",
+        described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),
+    )
+    assert entry.is_decorative is True
+    assert entry.description == ""
+
+
+def test_media_photo_described_rejects_absolute_local_path():
+    """Path-traversal defence carries over from the downloaded variant —
+    the bytes referenced by `local_path` are inherited from the prior
+    state, so the same hardening applies.
+    """
+    import pytest
+    from pydantic import ValidationError
+
+    from xbrain.models import MediaPhotoDescribed
+
+    with pytest.raises(ValidationError):
+        MediaPhotoDescribed(
+            url="https://pbs.twimg.com/media/X.jpg",
+            local_path="/etc/passwd",
+            width=1,
+            height=1,
+            bytes_size=1,
+            downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            is_decorative=False,
+            description="x",
+            description_lang="English",
+            description_version="v1",
+            described_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        )
+
+
+def test_media_photo_described_rejects_parent_traversal_local_path():
+    """Same defence as the downloaded variant — `..` in `local_path` is rejected."""
+    import pytest
+    from pydantic import ValidationError
+
+    from xbrain.models import MediaPhotoDescribed
+
+    with pytest.raises(ValidationError):
+        MediaPhotoDescribed(
+            url="https://pbs.twimg.com/media/X.jpg",
+            local_path="../escape/x.jpg",
+            width=1,
+            height=1,
+            bytes_size=1,
+            downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            is_decorative=False,
+            description="x",
+            description_lang="English",
+            description_version="v1",
+            described_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        )
+
+
 def test_media_discriminator_rejects_unknown_kind():
     """Silently inventing a variant would mask data corruption — reject loudly."""
     import pytest

From deb094dbb6ed8e36835743fc838b59be7c48bb1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:18:18 +0200
Subject: [PATCH 02/11] [#34] add describe-image rubric with {language}
 placeholder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vision-describe call ships a declarative system prompt — same
shape as the other rubrics. It enumerates the decorative-vs-content
classification, the per-image description contract (1-3 sentences,
plain prose, faithful), the refusal fallback (decorative + empty
description), and the exact JSON output shape (list of {index,
is_decorative, description}).

The `{language}` placeholder follows the existing rubric convention
so `output_language` propagates into vision calls the same way it
already propagates into summary/topic-page rubrics.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/xbrain/rubrics/rubric-describe-image.md | 72 +++++++++++++++++++++
 tests/test_rubrics.py                       | 21 ++++++
 2 files changed, 93 insertions(+)
 create mode 100644 src/xbrain/rubrics/rubric-describe-image.md

diff --git a/src/xbrain/rubrics/rubric-describe-image.md b/src/xbrain/rubrics/rubric-describe-image.md
new file mode 100644
index 0000000..4ebf214
--- /dev/null
+++ b/src/xbrain/rubrics/rubric-describe-image.md
@@ -0,0 +1,72 @@
+# Rubric — Describe images
+
+You describe images for a personal knowledge wiki. The descriptions are
+read by a downstream LLM that assigns topics and writes topic-page
+overviews — they are NOT shown to the user. Write for that LLM: be
+factual, dense, and short.
+
+- **Language:** {language}, regardless of any text visible in the image.
+- **Length per description:** 1 to 3 sentences. No preamble ("This image
+  shows..."). No markdown, no wikilinks, no bullet characters, no quotes.
+- **Faithful:** describe only what is visible. Never invent text, numbers
+  or names. If a chart's labels are unreadable, say so plainly rather
+  than guessing.
+
+## Classify each image
+
+For every image you receive, decide one of two buckets:
+
+- **`is_decorative: true`** when the image carries no topical content.
+  Avatars, profile pictures, plain reaction GIFs / memes, abstract
+  backgrounds, pure aesthetic stills, decorative banners, brand logos
+  used as ornaments. Decorative images contribute no topic signal —
+  the downstream LLM will skip them.
+- **`is_decorative: false`** when the image conveys information. This
+  is the common case: screenshots of text / code / charts / diagrams /
+  papers / dashboards / UIs, photos of whiteboards, slides, product
+  shots with visible labels, data visualisations, infographics, real
+  scenes whose content is the point (e.g. a queue at a launch event,
+  a protest sign, a hardware close-up).
+
+When in doubt, prefer **`is_decorative: false`**: a description is
+cheap, missing topic signal is not.
+
+## Write each description
+
+- For a chart: name the chart type, the axes (if labelled), the
+  comparison being made, and any headline number visible. Two sentences
+  is usually enough.
+- For a screenshot of text: paraphrase the substance in your own words.
+  Quote a short distinctive phrase only if the verbatim wording matters
+  (a product name, a thesis statement).
+- For a diagram: name the components and the relationships between them
+  in one sentence; the second sentence may add what the diagram is
+  arguing.
+- For a photo: state what is depicted and any visible text or signage.
+- **For a decorative image:** set `description` to the empty string
+  `""`. Do not write "decorative image" or any placeholder — the empty
+  string is the contract.
+
+## Refusals
+
+If you cannot describe an image (a recognisable face, NSFW, or any
+content you must decline), do not raise an error: emit
+`is_decorative: true` with `description: ""`. The downstream LLM will
+treat the entry as a decorative no-signal photo. No special-case
+handling is needed.
+
+## Output format
+
+Respond with a single JSON list, one entry per image in the order you
+received them. Use `index` to disambiguate; the caller maps it back to
+the input position.
+
+```json
+[
+  {"index": 0, "is_decorative": false, "description": "Line chart comparing GPT-4 and Claude on MMLU; Claude is 2 points higher."},
+  {"index": 1, "is_decorative": true, "description": ""}
+]
+```
+
+- Exactly one entry per input image.
+- No extra keys, no preamble, no surrounding prose.
diff --git a/tests/test_rubrics.py b/tests/test_rubrics.py
index 2095d1a..688a706 100644
--- a/tests/test_rubrics.py
+++ b/tests/test_rubrics.py
@@ -86,3 +86,24 @@ def test_topic_page_rubric_loads():
     text = load_rubric("topic-page")
     assert "overview" in text
     assert "notes" in text
+
+
+def test_describe_image_rubric_loads_and_substitutes_language():
+    """The describe-image rubric ships a `{language}` placeholder; the
+    loader must substitute it, and the defensive check must not trip on
+    correctly-spelt placeholders.
+    """
+    text = load_rubric("describe-image", language="English")
+    assert "{language}" not in text
+    assert "English" in text
+    # Sanity: the contract keys must appear in the prompt so the LLM
+    # produces the right JSON shape.
+    assert "is_decorative" in text
+    assert "description" in text
+    assert "index" in text
+
+
+def test_describe_image_rubric_preserves_placeholder_when_language_none():
+    """No-language calls (tests, manual inspection) keep the literal placeholder."""
+    text = load_rubric("describe-image")
+    assert "{language}" in text

From b59d6106dce73a226130d3605c41303144649da4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:28:12 +0200
Subject: [PATCH 03/11] [#34] add xbrain describe orchestrator + Anthropic
 client wiring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The orchestrator walks every eligible photo, batches the bytes into
vision-API calls (default 5 images per call), parses the per-image
JSON list judgments, and transitions matched entries to
MediaPhotoDescribed. Decorative classifications are persisted with
an empty description so downstream consumers (enrich-time prompt,
topic-synth prompt) can filter them out at a single seam.

The shape mirrors xbrain.executors.api and xbrain.topic_synth:
narrow recoverable-errors tuple, per-batch failure isolation,
logger.warning on every failure, RuntimeError on total failure, and
a SUMMARY: described/failed/skipped stderr line. Programmer bugs
(AttributeError) and KeyboardInterrupt propagate as their narrow
catch is on Exception subclasses only.

Eligibility uses a description-version tag: bumping the configured
version invalidates persisted descriptions automatically (no --force
needed). The Ctrl-C-coherent invariant is delegated to an
on_progress callback the CLI wires to a store write. 41 unit tests
exercise idempotency, force/stale-version, batching, partial+total
failure, refusal handling, language plumbing, programmer-bug
propagation, KeyboardInterrupt, file-missing fallback, summary
line, and clock injection — all via FakeAnthropic.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/xbrain/describe.py | 772 +++++++++++++++++++++++++++++++++
 tests/test_describe.py | 953 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1725 insertions(+)
 create mode 100644 src/xbrain/describe.py
 create mode 100644 tests/test_describe.py

diff --git a/src/xbrain/describe.py b/src/xbrain/describe.py
new file mode 100644
index 0000000..61f514e
--- /dev/null
+++ b/src/xbrain/describe.py
@@ -0,0 +1,772 @@
+"""Describe downloaded photos with a vision LLM; feed descriptions into enrich.
+
+The `describe_all` orchestrator walks every photo entry the downloader
+has produced, batches the bytes into vision-API calls (default: 5 images
+per call to Claude Sonnet), parses the per-image JSON judgments, and
+transitions matched entries to `MediaPhotoDescribed`. The persisted
+description is consumed by the enrich-time prompt in `executors.api`
+and the topic-synth prompt in `topic_synth` — decorative photos are
+filtered out at that consumption seam so they introduce no topic noise.
+
+The structure mirrors `xbrain.executors.api` and `xbrain.topic_synth`:
+a recoverable-errors tuple, per-batch failure isolation,
+`logger.warning` on every failure, `RuntimeError` on total failure, and
+a `SUMMARY: described: N, failed: M, skipped: K` stderr line for the
+CLI. Programmer bugs (`AttributeError`) and `KeyboardInterrupt`
+propagate — narrow `except` clauses only.
+
+I/O dependencies (the Anthropic client, the media root path) are
+keyword-injectable so tests run offline. The store mutation is in
+place; the caller is expected to wrap each transition with a
+store-write (the `on_progress` callback fires after every batch).
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+import logging
+import sys
+import time
+from collections.abc import Callable, Iterator
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+
+from xbrain.models import (
+    Item,
+    MediaPhotoDescribed,
+    MediaPhotoDownloaded,
+)
+from xbrain.rubrics import load_rubric
+
+logger = logging.getLogger(__name__)
+
+# Default per-call image count. The vision rubric is tuned for batches in
+# the 1-10 range; 5 is the sweet spot the spec settled on (12-15 % token
+# saving vs per-image, modest added complexity).
+_DEFAULT_BATCH_SIZE = 5
+
+# Token ceiling for the JSON list response. Per-image average is ~3
+# sentences ≈ 80 tokens of prose + 20 of JSON scaffolding = ~100 tokens.
+# A batch of 5 fits comfortably under 600; the cap is set high enough to
+# survive an over-eager model that emits long descriptions.
+_MAX_TOKENS = 1200
+
+# Map file extensions to the Anthropic vision media-type strings. The
+# downloader writes one of these three (see `xbrain.media._FORMAT_EXTENSIONS`).
+_MEDIA_TYPES: dict[str, str] = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".webp": "image/webp",
+}
+
+
+def _utcnow() -> datetime:
+    """Default `now` clock — UTC-aware `datetime.now()`.
+
+    Module-level (not a lambda inside `describe_all`) so tests can
+    monkeypatch a deterministic clock without going through the `now=`
+    keyword argument on every call site.
+    """
+    return datetime.now(timezone.utc)
+
+
+@dataclass
+class DescribeReport:
+    """Counts emitted by `describe_all` for the CLI's SUMMARY line.
+
+    `photos_skipped_already_described` is the idempotency proof — a
+    no-op re-run with no version bump must report every previously
+    described photo here. `batches_attempted` counts API calls actually
+    issued; `batches_failed` counts the ones the recoverable-errors
+    tuple swallowed. A run with `photos_attempted > 0 and
+    photos_described == 0` raises before this report leaves the
+    orchestrator.
+    """
+
+    items_processed: int = 0
+    photos_attempted: int = 0
+    photos_described: int = 0
+    photos_failed: int = 0
+    photos_skipped_already_described: int = 0
+    batches_attempted: int = 0
+    batches_failed: int = 0
+    elapsed_seconds: float = 0.0
+    # Per-item failures keyed by item id → list of (url, error) tuples.
+    # Surfaces in the verbose CLI output without re-walking the store.
+    per_item_failures: dict[str, list[tuple[str, str]]] = field(default_factory=dict)
+
+
+@dataclass(frozen=True)
+class _Candidate:
+    """One photo eligible for description on this run.
+
+    Holds back-references to `Item` and the media-list `index` so the
+    orchestrator can swap the transitioned variant back into place
+    without re-scanning the store. `bytes_data` is loaded lazily by
+    `_load_bytes` — failing to read the file is a per-photo failure,
+    not a total-run abort.
+    """
+
+    item_id: str
+    item: Item
+    index: int
+    entry: MediaPhotoDownloaded | MediaPhotoDescribed
+
+
+def _recoverable_errors() -> tuple[type[Exception], ...]:
+    """Exception classes a per-batch failure should swallow + log + continue on.
+
+    Mirrors `xbrain.executors.api._recoverable_errors`. `anthropic.APIError`
+    covers auth, rate-limit, server-side and network errors the SDK
+    normalises. `ValueError` covers validator rejections (and
+    `pydantic.ValidationError`, a `ValueError` subclass in pydantic v2).
+    `json.JSONDecodeError` covers malformed LLM responses. `KeyError`
+    covers responses missing expected fields. `OSError` covers
+    file-read failures when streaming photo bytes off disk (a missing
+    file under `data/media/` is per-photo recoverable, not a total
+    abort).
+
+    Lazy-imported because `anthropic` is optional in the test
+    environment (the client is faked via `tests.conftest.FakeAnthropic`).
+    """
+    try:
+        from anthropic import APIError
+
+        return (APIError, ValueError, json.JSONDecodeError, KeyError, OSError)
+    except ImportError:
+        return (ValueError, json.JSONDecodeError, KeyError, OSError)
+
+
+def _is_stale(entry: MediaPhotoDescribed, *, current_version: str) -> bool:
+    """A described entry is stale when its version no longer matches the config.
+
+    Bumping `describe_version` in `config.toml` is the manual trigger to
+    re-describe the whole corpus against a new rubric — no `--force`
+    needed. The version check is exact-string: there is no ordering
+    relation between versions, only equality, so a deliberate downgrade
+    is also a "describe again" signal.
+    """
+    return entry.description_version != current_version
+
+
+def _eligible(
+    entry: object,
+    *,
+    force: bool,
+    current_version: str,
+) -> bool:
+    """Decide whether `describe_all` should attempt this entry on THIS run.
+
+    `MediaPhotoDownloaded` entries are always eligible — they have not
+    been described yet by definition. `MediaPhotoDescribed` entries are
+    eligible only when `--force` is set or the persisted version is
+    stale vs the current `describe_version` config. Every other
+    variant (`MediaPhotoPending`, `MediaPhotoFailed`, `MediaVideoPending`)
+    is out of scope — describing only runs over photos whose bytes are
+    already on disk.
+    """
+    if isinstance(entry, MediaPhotoDownloaded):
+        return True
+    if isinstance(entry, MediaPhotoDescribed):
+        if force:
+            return True
+        return _is_stale(entry, current_version=current_version)
+    return False
+
+
+def _tally_skipped(
+    entry: object,
+    *,
+    current_version: str,
+    report: DescribeReport,
+) -> None:
+    """Bump `photos_skipped_already_described` when this entry is a no-op skip.
+
+    Pulled out of the candidate iterator so the loop body in
+    `_iter_candidates` keeps a low complexity grade. Only described
+    entries on the current version count as skips — pending/failed/
+    video entries are silently out of scope for `xbrain describe`.
+    """
+    if isinstance(entry, MediaPhotoDescribed) and not _is_stale(
+        entry, current_version=current_version
+    ):
+        report.photos_skipped_already_described += 1
+
+
+def _iter_item_candidates(
+    item_id: str,
+    item: Item,
+    *,
+    force: bool,
+    current_version: str,
+    report: DescribeReport,
+) -> Iterator[_Candidate]:
+    """Yield every eligible candidate inside one item's media list.
+
+    Pulled out of the cross-item iterator so the outer scan stays
+    flat. Per-entry tallies (skips) go on the report; the caller
+    decides whether the item itself counts as processed by checking
+    if this iterator yielded anything.
+    """
+    for index, entry in enumerate(item.media):
+        if not _eligible(entry, force=force, current_version=current_version):
+            _tally_skipped(entry, current_version=current_version, report=report)
+            continue
+        assert isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed))
+        yield _Candidate(item_id=item_id, item=item, index=index, entry=entry)
+
+
+def _take_with_limit(candidates: Iterator[_Candidate], limit: int | None) -> Iterator[_Candidate]:
+    """Yield from `candidates`, stopping after `limit` items.
+
+    Pulled out so `_iter_candidates` does not interleave the
+    limit-countdown with the per-item bookkeeping. `None` means "no
+    limit" — yields everything.
+    """
+    if limit is None:
+        yield from candidates
+        return
+    remaining = limit
+    for candidate in candidates:
+        if remaining <= 0:
+            return
+        remaining -= 1
+        yield candidate
+
+
+def _iter_candidates(
+    items: dict[str, Item],
+    *,
+    force: bool,
+    limit: int | None,
+    items_filter: set[str] | None,
+    current_version: str,
+    report: DescribeReport,
+) -> Iterator[_Candidate]:
+    """Yield each candidate eligible for description on THIS run.
+
+    Side effects on `report`: bumps `items_processed` once per item
+    that contributes at least one yielded candidate, and bumps
+    `photos_skipped_already_described` for each `MediaPhotoDescribed`
+    entry passed over (via `_tally_skipped`). Stops yielding once
+    `limit` is exhausted.
+
+    `items_processed` is bumped on the FIRST yielded candidate of an
+    item, not at the end of its scan — that way a `limit` that
+    truncates mid-item still counts the item as processed (work
+    happened on it). Items whose every photo was skipped do NOT count
+    as processed.
+    """
+    seen_item_ids: set[str] = set()
+
+    def _all_eligible() -> Iterator[_Candidate]:
+        for item_id, item in items.items():
+            if items_filter is not None and item_id not in items_filter:
+                continue
+            if not item.media:
+                continue
+            yield from _iter_item_candidates(
+                item_id,
+                item,
+                force=force,
+                current_version=current_version,
+                report=report,
+            )
+
+    for candidate in _take_with_limit(_all_eligible(), limit):
+        if candidate.item_id not in seen_item_ids:
+            seen_item_ids.add(candidate.item_id)
+            report.items_processed += 1
+        yield candidate
+
+
+def _chunked(candidates: list[_Candidate], size: int) -> Iterator[list[_Candidate]]:
+    """Split a candidate list into batches of at most `size` entries.
+
+    `size` is guaranteed positive by the CLI layer (`Typer` validates
+    integer ranges); this helper does not re-validate. An empty input
+    yields nothing — the orchestrator never issues a no-op API call.
+    """
+    for start in range(0, len(candidates), size):
+        yield candidates[start : start + size]
+
+
+def _media_type(local_path: str) -> str:
+    """Map an on-disk path's extension to its Anthropic media-type string.
+
+    The downloader writes one of `.jpg` / `.png` / `.webp` (see
+    `xbrain.media._FORMAT_EXTENSIONS`). Anything else came from a hand
+    edit of `items.json` or a future format we have not registered —
+    fall back to `image/jpeg` and let Anthropic reject it if the bytes
+    do not match. The fall-back keeps a per-photo failure out of the
+    total-failure raise path.
+    """
+    suffix = Path(local_path).suffix.lower()
+    return _MEDIA_TYPES.get(suffix, "image/jpeg")
+
+
+def _load_bytes(media_root: Path, local_path: str) -> bytes:
+    """Read the photo bytes from `data/media/<local_path>`.
+
+    Raises `OSError` (a `FileNotFoundError` subclass when the file is
+    missing). The orchestrator's `_recoverable_errors` tuple catches it
+    so a missing file is a per-photo failure (the operator can re-run
+    `xbrain media` to repopulate), never a whole-batch abort.
+    """
+    return (media_root / local_path).read_bytes()
+
+
+def _build_image_block(data: bytes, media_type: str) -> dict:
+    """Build one Anthropic vision content block from raw photo bytes.
+
+    The wire shape is `{type: image, source: {type: base64, media_type, data}}`.
+    Tests bypass this by using a `FakeAnthropic` that does not inspect
+    `messages`; production uses the real SDK which validates the shape.
+    """
+    return {
+        "type": "image",
+        "source": {
+            "type": "base64",
+            "media_type": media_type,
+            "data": base64.b64encode(data).decode("ascii"),
+        },
+    }
+
+
+def _system_prompt(language: str) -> str:
+    """Build the system prompt — the declarative rubric with `{language}` substituted."""
+    return load_rubric("describe-image", language=language)
+
+
+def _user_directive(batch_size: int) -> str:
+    """Plain-text directive that follows the image blocks in the user turn.
+
+    Tells the model the index range so the JSON list it emits is
+    contractually one-to-one with the input order. Indices are
+    zero-based to match Python and the rubric's example.
+    """
+    last = batch_size - 1
+    return (
+        f"Describe images 0 through {last}. "
+        "Return a JSON list with one entry per image, in the order received."
+    )
+
+
+def _extract_response_text(response: object) -> str:
+    """Pull the JSON-bearing text out of a vision response, stripping fences.
+
+    The Anthropic SDK packs the model's reply in `.content` as a list
+    of typed blocks; only `text` blocks carry JSON. Some models wrap
+    the JSON in a ```json ... ``` Markdown fence despite the rubric
+    explicitly forbidding it — strip a single leading/trailing fence
+    pair so the downstream `json.loads` does not trip on a Markdown
+    artefact.
+    """
+    blocks = [b for b in getattr(response, "content", []) if getattr(b, "type", None) == "text"]
+    if not blocks:
+        raise ValueError("vision response has no text block")
+    text = "".join(b.text for b in blocks).strip()
+    if text.startswith("```"):
+        first_newline = text.find("\n")
+        if first_newline != -1:
+            text = text[first_newline + 1 :]
+        if text.endswith("```"):
+            text = text[: -len("```")]
+        text = text.strip()
+    return text
+
+
+def _validate_judgment_entry(entry: object, *, batch_size: int) -> dict:
+    """Validate one per-image judgment dict and return it on success.
+
+    Pulled out of `_parse_batch_response` so the loop body in the
+    parser stays a one-liner — keeps the complexity grade off C.
+    Enforces the wire contract: required keys + int (not bool) index
+    in `[0, batch_size)` + bool decorative flag + str description.
+    """
+    if not isinstance(entry, dict):
+        raise ValueError(f"vision response entry is not a JSON object: {entry!r}")
+    if not {"index", "is_decorative", "description"} <= entry.keys():
+        raise ValueError(
+            f"vision response entry missing required keys "
+            f"(got {sorted(entry)!r}, need index/is_decorative/description)"
+        )
+    index = entry["index"]
+    # `bool` is a subclass of `int` in Python — exclude it explicitly so a
+    # `True`/`False` index does not silently become 1/0.
+    if not isinstance(index, int) or isinstance(index, bool):
+        raise ValueError(f"vision response `index` must be int, got {index!r}")
+    if not (0 <= index < batch_size):
+        raise ValueError(f"vision response `index` {index} out of batch range [0, {batch_size})")
+    if not isinstance(entry["is_decorative"], bool):
+        raise ValueError(
+            f"vision response `is_decorative` must be bool, got {entry['is_decorative']!r}"
+        )
+    if not isinstance(entry["description"], str):
+        raise ValueError(f"vision response `description` must be str, got {entry['description']!r}")
+    return entry
+
+
+def _parse_batch_response(response: object, batch_size: int) -> list[dict]:
+    """Extract and validate the JSON list of judgments from a vision response.
+
+    Splits responsibility cleanly: `_extract_response_text` deals with
+    transport (text blocks + Markdown fence noise), this function deals
+    with shape (list of judgments with unique indices), and
+    `_validate_judgment_entry` deals with per-entry typing. Each step
+    raises `ValueError` so the orchestrator's `_recoverable_errors`
+    tuple catches every shape violation as a per-batch failure.
+    """
+    text = _extract_response_text(response)
+    decoded = json.loads(text)
+    if not isinstance(decoded, list):
+        raise ValueError(f"vision response is not a JSON list (got {type(decoded).__name__})")
+    seen_indices: set[int] = set()
+    judgments: list[dict] = []
+    for entry in decoded:
+        validated = _validate_judgment_entry(entry, batch_size=batch_size)
+        index = validated["index"]
+        if index in seen_indices:
+            raise ValueError(f"vision response has duplicate index {index}")
+        seen_indices.add(index)
+        judgments.append(validated)
+    return judgments
+
+
+def _apply_judgment(
+    candidate: _Candidate,
+    judgment: dict,
+    *,
+    language: str,
+    version: str,
+    described_at: datetime,
+) -> MediaPhotoDescribed:
+    """Build the post-transition `MediaPhotoDescribed` variant for one candidate.
+
+    Decorative entries are written with an empty description regardless
+    of what the model returned for the `description` field — the rubric
+    contract is "empty for decorative" and we enforce it at the
+    boundary so downstream code can rely on `is_decorative => not
+    description`. The pre-existing `local_path` / `width` / `height` /
+    `bytes_size` / `downloaded_at` fields are carried over verbatim
+    from the prior variant (Downloaded or Described).
+    """
+    is_decorative = bool(judgment["is_decorative"])
+    description = "" if is_decorative else str(judgment["description"])
+    return MediaPhotoDescribed(
+        url=candidate.entry.url,
+        local_path=candidate.entry.local_path,
+        width=candidate.entry.width,
+        height=candidate.entry.height,
+        bytes_size=candidate.entry.bytes_size,
+        downloaded_at=candidate.entry.downloaded_at,
+        is_decorative=is_decorative,
+        description=description,
+        description_lang=language,
+        description_version=version,
+        described_at=described_at,
+    )
+
+
+def _record_batch_failure(
+    report: DescribeReport,
+    batch: list[_Candidate],
+    error: str,
+) -> None:
+    """Mark every candidate in a failed batch as a per-photo failure.
+
+    A per-batch API error means we have no judgments for any of the
+    images in that batch — each photo stays in its current variant
+    (`MediaPhotoDownloaded` or stale `MediaPhotoDescribed`) and is
+    eligible again on the next `xbrain describe` run. The counts and
+    `per_item_failures` map reflect the per-photo unit so a partial-batch
+    success on a follow-up run can still net positive without confusing
+    the totals.
+    """
+    report.batches_failed += 1
+    for candidate in batch:
+        report.photos_failed += 1
+        report.per_item_failures.setdefault(candidate.item_id, []).append(
+            (candidate.entry.url, error)
+        )
+
+
+def describe_all(
+    items: dict[str, Item],
+    media_root: Path,
+    *,
+    model: str,
+    output_language: str,
+    description_version: str,
+    force: bool = False,
+    limit: int | None = None,
+    items_filter: list[str] | None = None,
+    batch_size: int = _DEFAULT_BATCH_SIZE,
+    client: object | None = None,
+    on_progress: Callable[[], None] | None = None,
+    now: Callable[[], datetime] | None = None,
+) -> DescribeReport:
+    """Describe every eligible photo across the store; return a structured report.
+
+    Eligibility (without `--force`):
+    - `MediaPhotoDownloaded` — always.
+    - `MediaPhotoDescribed` whose `description_version` ≠ `description_version`
+      (the per-call argument, sourced from `config.describe_version`).
+
+    With `--force`:
+    - Every `MediaPhotoDownloaded` AND every `MediaPhotoDescribed` is
+      re-described. The persisted description is overwritten.
+
+    Out of scope (every run):
+    - `MediaPhotoPending` — describing only runs over photos with bytes
+      on disk; the operator must call `xbrain media` first.
+    - `MediaPhotoFailed` — same reason.
+    - `MediaVideoPending` — photos only; vision-API support for video
+      is a separate phase.
+
+    The function mutates `items` in place; the caller is expected to
+    wrap each batch transition with a store write (the `on_progress`
+    callback fires after every batch, success or failure). The
+    Ctrl-C-coherent invariant lives there: the store is written
+    between batches, never mid-API-call.
+
+    `media_root` is the directory under which `<item_id>/<index>.<ext>`
+    photo files live (typically `data/media/`).
+
+    Raises:
+        RuntimeError: when EVERY batch attempted in the run fails. A
+            total failure (a revoked API key, an exhausted quota, a
+            corrupted media tree) must surface as a non-zero exit. The
+            CLI's `_handle_cli_errors` converts this into a clean
+            operator message + exit code 1.
+    """
+    if client is None:
+        from anthropic import Anthropic  # lazy: tests inject FakeAnthropic
+
+        client = Anthropic()  # reads ANTHROPIC_API_KEY from the environment
+    clock: Callable[[], datetime] = now if now is not None else _utcnow
+
+    started = time.monotonic()
+    report = DescribeReport()
+    filter_set = set(items_filter) if items_filter else None
+    candidates = list(
+        _iter_candidates(
+            items,
+            force=force,
+            limit=limit,
+            items_filter=filter_set,
+            current_version=description_version,
+            report=report,
+        )
+    )
+    if not candidates:
+        report.elapsed_seconds = time.monotonic() - started
+        return report
+
+    system = _system_prompt(output_language)
+    recoverable = _recoverable_errors()
+
+    for batch in _chunked(candidates, batch_size):
+        _run_one_batch(
+            batch=batch,
+            client=client,
+            model=model,
+            system=system,
+            output_language=output_language,
+            description_version=description_version,
+            clock=clock,
+            media_root=media_root,
+            recoverable=recoverable,
+            report=report,
+        )
+        if on_progress is not None:
+            on_progress()
+
+    report.elapsed_seconds = time.monotonic() - started
+    _raise_on_total_failure(report)
+    _print_partial_failure_summary(report)
+    return report
+
+
+def _run_one_batch(
+    *,
+    batch: list[_Candidate],
+    client: object,
+    model: str,
+    system: str,
+    output_language: str,
+    description_version: str,
+    clock: Callable[[], datetime],
+    media_root: Path,
+    recoverable: tuple[type[Exception], ...],
+    report: DescribeReport,
+) -> None:
+    """Execute one batch end-to-end: build, call, parse, apply, catch.
+
+    Pulled out of `describe_all` to keep the outer loop's complexity
+    under grade C while preserving the per-batch isolation contract.
+    Programmer bugs (`AttributeError`, ...) and `KeyboardInterrupt`
+    fall outside `recoverable` and propagate.
+    """
+    report.batches_attempted += 1
+    report.photos_attempted += len(batch)
+    try:
+        content_blocks = _build_user_blocks(batch, media_root)
+        response = _messages_create(
+            client=client,
+            model=model,
+            max_tokens=_MAX_TOKENS,
+            system=system,
+            content_blocks=content_blocks,
+        )
+        judgments = _parse_batch_response(response, batch_size=len(batch))
+        _apply_batch_judgments(
+            batch=batch,
+            judgments=judgments,
+            language=output_language,
+            version=description_version,
+            described_at=clock(),
+            report=report,
+        )
+    except recoverable as exc:
+        message = str(exc)
+        logger.warning("describe: batch failed (%d photos): %s", len(batch), message)
+        _record_batch_failure(report, batch, message)
+
+
+def _raise_on_total_failure(report: DescribeReport) -> None:
+    """Raise `RuntimeError` when every attempted batch failed.
+
+    A total-failure run is a non-zero-exit signal — the CLI's
+    `_handle_cli_errors` converts the raise into a clean operator
+    message + exit code 1. Pulled out so `describe_all` keeps its
+    complexity grade under C.
+    """
+    if report.photos_attempted > 0 and report.photos_described == 0:
+        raise RuntimeError(
+            f"All {report.batches_attempted} describe batches failed "
+            f"({report.photos_attempted} photos); see warnings above for details."
+        )
+
+
+def _print_partial_failure_summary(report: DescribeReport) -> None:
+    """Print the SUMMARY line on partial failure — mirrors `executors.api`.
+
+    Stays silent on a clean run (no failures = no noise) and on a
+    total-failure run (the raise above is the signal). The line shape
+    matches `xbrain.media.emit_summary_line` so log parsers can rely
+    on a single `SUMMARY:` prefix across all stages.
+    """
+    if report.photos_failed > 0:
+        print(
+            f"SUMMARY: described: {report.photos_described}, "
+            f"failed: {report.photos_failed}, "
+            f"skipped: {report.photos_skipped_already_described}",
+            file=sys.stderr,
+        )
+
+
+def _build_user_blocks(batch: list[_Candidate], media_root: Path) -> list[dict]:
+    """Build the user-turn content blocks for one batch: images + directive.
+
+    Loads each photo's bytes from disk (raises `OSError` on a missing
+    file — caught by the orchestrator's `_recoverable_errors` so the
+    whole batch is marked failed and the next run re-attempts). Each
+    image is encoded once; the directive text block follows last so the
+    rubric's "Describe images 0 through N" framing pairs with the
+    visual context above it.
+    """
+    blocks: list[dict] = []
+    for candidate in batch:
+        data = _load_bytes(media_root, candidate.entry.local_path)
+        blocks.append(_build_image_block(data, _media_type(candidate.entry.local_path)))
+    blocks.append({"type": "text", "text": _user_directive(len(batch))})
+    return blocks
+
+
+def _messages_create(
+    *,
+    client: object,
+    model: str,
+    max_tokens: int,
+    system: str,
+    content_blocks: list[dict],
+) -> object:
+    """Thin wrapper around the Anthropic SDK's `messages.create`.
+
+    Pulled out so the orchestrator stays readable and so tests can
+    inject a `FakeAnthropic` whose `messages.create` records kwargs.
+    Returns the raw response — the parser inspects `.content` blocks.
+    """
+    return client.messages.create(  # type: ignore[attr-defined]
+        model=model,
+        max_tokens=max_tokens,
+        system=system,
+        messages=[{"role": "user", "content": content_blocks}],
+    )
+
+
+def _apply_batch_judgments(
+    *,
+    batch: list[_Candidate],
+    judgments: list[dict],
+    language: str,
+    version: str,
+    described_at: datetime,
+    report: DescribeReport,
+) -> None:
+    """Apply per-image judgments to the batch, transitioning each entry.
+
+    Pairs each judgment with the candidate at its `index`. A judgment
+    count that does not match the batch size is a contract violation
+    (parser already enforces the index range) — if the list is short,
+    the missing candidates are tallied as per-photo failures so they
+    re-enter the candidate pool on the next run; if it is long, the
+    excess is a programmer bug (parser-side dup check rules out
+    duplicate indices already).
+    """
+    by_index = {entry["index"]: entry for entry in judgments}
+    if len(by_index) != len(judgments):
+        # Defence in depth — `_parse_batch_response` already rejects
+        # duplicate indices, so reaching here means the parser was
+        # bypassed. Raise so the developer sees it.
+        raise ValueError("internal: duplicate judgment indices survived parser check")
+    for position, candidate in enumerate(batch):
+        judgment = by_index.get(position)
+        if judgment is None:
+            report.photos_failed += 1
+            report.per_item_failures.setdefault(candidate.item_id, []).append(
+                (candidate.entry.url, f"vision response omitted index {position}")
+            )
+            continue
+        new_entry = _apply_judgment(
+            candidate,
+            judgment,
+            language=language,
+            version=version,
+            described_at=described_at,
+        )
+        candidate.item.media[candidate.index] = new_entry
+        report.photos_described += 1
+
+
+def emit_summary_line(report: DescribeReport, *, out: "io.IOBase | None" = None) -> None:
+    """Print the SUMMARY line on stderr (mirrors `media.emit_summary_line`).
+
+    Stays silent on a fully no-op run (no attempts, no skips) — a
+    `--limit 0` or an `--items` filter that matched nothing produces no
+    noise. `out` is injectable for tests; defaults to `sys.stderr`.
+    """
+    if report.photos_attempted == 0 and report.photos_skipped_already_described == 0:
+        return
+    target = out if out is not None else sys.stderr
+    print(
+        f"SUMMARY: described: {report.photos_described}, "
+        f"failed: {report.photos_failed}, "
+        f"skipped: {report.photos_skipped_already_described}",
+        file=target,
+    )
diff --git a/tests/test_describe.py b/tests/test_describe.py
new file mode 100644
index 0000000..a3a8ff0
--- /dev/null
+++ b/tests/test_describe.py
@@ -0,0 +1,953 @@
+"""Tests for `xbrain.describe` — the vision-describe orchestrator.
+
+The Anthropic client is faked via `tests.conftest.FakeAnthropic`; no
+real API calls. Photo bytes are written to a tmp `data/media/` tree so
+the orchestrator can read them through its normal `_load_bytes` path.
+
+Coverage targets every contract the spec calls out:
+- variant transitions (Downloaded → Described; Described stale → Described)
+- idempotency (no-op re-runs skip already-described entries)
+- batching (5-at-a-time by default; partial batch at the end)
+- per-batch error isolation (one failing batch does not abort the run)
+- total-failure raise (every batch errored)
+- refusal handling (decorative + empty description, never crashes)
+- language plumbing (the rubric ships with `{language}` substituted)
+- programmer-bug propagation (`AttributeError` is NOT swallowed)
+- Ctrl-C propagation (`KeyboardInterrupt` is NOT swallowed)
+- stale-version logic + `--force` semantics
+- summary line on stderr
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from pathlib import Path
+
+import pytest
+
+from xbrain.describe import (
+    DescribeReport,
+    _eligible,
+    _parse_batch_response,
+    _validate_judgment_entry,
+    describe_all,
+    emit_summary_line,
+)
+from xbrain.models import (
+    Author,
+    Item,
+    MediaPhotoDescribed,
+    MediaPhotoDownloaded,
+    MediaPhotoFailed,
+    MediaPhotoPending,
+    MediaVideoPending,
+)
+
+from tests.conftest import FakeAnthropic, FakeBlock, FakeResponse
+
+# --------------------------------------------------------------------- fixtures + helpers
+
+
+def _photo_bytes_jpg() -> bytes:
+    """A minimal 4x3 JPEG so the loader's `read_bytes` returns something realistic.
+
+    We do NOT exercise Pillow in describe-tests — the orchestrator only
+    reads the file and base64-encodes it. Any non-empty bytes work; we
+    use real JPEG bytes so the media-type mapping has something
+    plausible to round-trip.
+    """
+    from io import BytesIO
+
+    from PIL import Image
+
+    buf = BytesIO()
+    Image.new("RGB", (4, 3), color=(1, 2, 3)).save(buf, format="JPEG")
+    return buf.getvalue()
+
+
+def _write_photo(media_root: Path, item_id: str, index: int, ext: str = ".jpg") -> str:
+    """Write a fake photo to `data/media/<item_id>/<index><ext>`; return the rel path."""
+    rel = f"{item_id}/{index}{ext}"
+    dst = media_root / rel
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    dst.write_bytes(_photo_bytes_jpg())
+    return rel
+
+
+def _downloaded(
+    *,
+    item_id: str = "1",
+    index: int = 0,
+    url: str | None = None,
+    media_root: Path | None = None,
+) -> MediaPhotoDownloaded:
+    """Build a `MediaPhotoDownloaded` whose bytes are on disk (when `media_root`)."""
+    rel = f"{item_id}/{index}.jpg"
+    if media_root is not None:
+        _write_photo(media_root, item_id, index)
+    return MediaPhotoDownloaded(
+        url=url or f"https://pbs.twimg.com/media/{item_id}-{index}.jpg",
+        local_path=rel,
+        width=4,
+        height=3,
+        bytes_size=512,
+        downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+    )
+
+
+def _described(
+    *,
+    item_id: str = "1",
+    index: int = 0,
+    version: str = "v1",
+    media_root: Path | None = None,
+) -> MediaPhotoDescribed:
+    """Build a `MediaPhotoDescribed` with optional on-disk bytes."""
+    rel = f"{item_id}/{index}.jpg"
+    if media_root is not None:
+        _write_photo(media_root, item_id, index)
+    return MediaPhotoDescribed(
+        url=f"https://pbs.twimg.com/media/{item_id}-{index}.jpg",
+        local_path=rel,
+        width=4,
+        height=3,
+        bytes_size=512,
+        downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        is_decorative=False,
+        description="a previously-described image",
+        description_lang="English",
+        description_version=version,
+        described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),
+    )
+
+
+def _item(item_id: str, media: list) -> Item:
+    """Build an `Item` populated with the given media entries."""
+    return Item(
+        id=item_id,
+        source="bookmark",
+        url=f"https://x.com/a/status/{item_id}",
+        author=Author(handle="a", name="A"),
+        text="text",
+        created_at=datetime(2026, 5, 10, tzinfo=timezone.utc),
+        captured_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        media=media,
+    )
+
+
+def _judgment(index: int, *, decorative: bool = False, description: str = "ok") -> dict:
+    """Build one per-image judgment dict matching the wire contract."""
+    return {
+        "index": index,
+        "is_decorative": decorative,
+        "description": "" if decorative else description,
+    }
+
+
+def _payload(judgments: list[dict]) -> list[dict]:
+    """Wrap a list of judgments as the JSON list the rubric expects.
+
+    `FakeAnthropic` JSON-encodes the payload as `json.dumps(payload)` —
+    a list payload survives that path unchanged because `dumps` accepts
+    any JSON-serialisable value, not just dicts. The orchestrator's
+    parser pulls the text from `.content[0].text` and parses it as a
+    JSON list.
+    """
+    return judgments  # type: ignore[return-value]
+
+
+class _FakeListResponse:
+    """A `FakeResponse`-shaped object whose `.content[0].text` is a JSON list.
+
+    `FakeAnthropic` wraps payloads in `FakeResponse(payload)` which calls
+    `json.dumps(payload)` — that path expects a dict. For describe we
+    need to ship a list payload, so we build a parallel response
+    type that mirrors the shape `_parse_batch_response` consumes.
+    """
+
+    def __init__(self, judgments: list[dict]):
+        import json
+
+        self.content = [type(FakeBlock(payload={}))(payload={})]  # placeholder block
+        # Overwrite the text on the block with the JSON list serialisation.
+        self.content[0].text = json.dumps(judgments)
+
+
+class _FakeMessagesList:
+    """A fake `client.messages` that pops `_FakeListResponse` per call.
+
+    Mirrors `tests.conftest.FakeMessages` but uses `_FakeListResponse`
+    so the payload is a JSON list. Exception instances are raised
+    rather than wrapped (same convention).
+    """
+
+    def __init__(self, payloads: list):
+        self._payloads = list(payloads)
+        self.calls: list[dict] = []
+
+    def create(self, **kwargs) -> _FakeListResponse:
+        self.calls.append(kwargs)
+        payload = self._payloads.pop(0)
+        if isinstance(payload, Exception):
+            raise payload
+        return _FakeListResponse(payload)
+
+
+class _FakeVisionClient:
+    """Drop-in fake for `anthropic.Anthropic` over JSON-list responses."""
+
+    def __init__(self, payloads: list):
+        self.messages = _FakeMessagesList(payloads)
+
+
+# --------------------------------------------------------------------- eligibility
+
+
+def test_eligible_downloaded_always_eligible():
+    """Downloaded entries are always eligible regardless of force/version."""
+    entry = _downloaded()
+    assert _eligible(entry, force=False, current_version="v1") is True
+    assert _eligible(entry, force=True, current_version="v1") is True
+
+
+def test_eligible_described_current_version_only_with_force():
+    """A described entry on the current version is skipped without --force."""
+    entry = _described(version="v1")
+    assert _eligible(entry, force=False, current_version="v1") is False
+    assert _eligible(entry, force=True, current_version="v1") is True
+
+
+def test_eligible_described_stale_version_is_eligible():
+    """A described entry on a stale version is eligible without --force."""
+    entry = _described(version="v1")
+    assert _eligible(entry, force=False, current_version="v2") is True
+
+
+def test_eligible_pending_failed_video_are_never_eligible():
+    """Pending / failed / video entries are out of scope for describe."""
+    pending = MediaPhotoPending(url="https://pbs.twimg.com/media/X.jpg")
+    failed = MediaPhotoFailed(
+        url="https://pbs.twimg.com/media/X.jpg",
+        failure_reason="http_4xx",
+        error="HTTP 404",
+        attempts=1,
+        last_attempt_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+    )
+    video = MediaVideoPending(url="https://video.twimg.com/x.mp4")
+    for entry in (pending, failed, video):
+        assert _eligible(entry, force=False, current_version="v1") is False
+        assert _eligible(entry, force=True, current_version="v1") is False
+
+
+# --------------------------------------------------------------------- parser
+
+
+def test_parse_batch_response_accepts_valid_list():
+    """A clean JSON list with the right keys round-trips."""
+    response = _FakeListResponse([_judgment(0), _judgment(1, decorative=True)])
+    out = _parse_batch_response(response, batch_size=2)
+    assert [e["index"] for e in out] == [0, 1]
+
+
+def test_parse_batch_response_strips_markdown_fence():
+    """Some models wrap JSON in ```json ... ``` despite the rubric."""
+    import json
+
+    class _Fenced:
+        content = [type("B", (), {"type": "text", "text": ""})()]
+
+    fenced_text = "```json\n" + json.dumps([_judgment(0)]) + "\n```"
+    _Fenced.content[0].text = fenced_text
+    out = _parse_batch_response(_Fenced(), batch_size=1)
+    assert out[0]["index"] == 0
+
+
+def test_parse_batch_response_rejects_non_list_root():
+    """A JSON object (not list) at the root violates the wire contract."""
+    import json
+
+    class _ObjResponse:
+        content = [type("B", (), {"type": "text", "text": json.dumps({"oops": True})})()]
+
+    with pytest.raises(ValueError, match="not a JSON list"):
+        _parse_batch_response(_ObjResponse(), batch_size=1)
+
+
+def test_parse_batch_response_rejects_missing_text_block():
+    """A response with no text block at all cannot be parsed."""
+
+    class _Empty:
+        content: list = []
+
+    with pytest.raises(ValueError, match="no text block"):
+        _parse_batch_response(_Empty(), batch_size=1)
+
+
+def test_parse_batch_response_rejects_duplicate_index():
+    """Duplicate indices would silently overwrite a transition — reject."""
+    response = _FakeListResponse([_judgment(0), _judgment(0)])
+    with pytest.raises(ValueError, match="duplicate index"):
+        _parse_batch_response(response, batch_size=2)
+
+
+def test_validate_judgment_entry_rejects_bool_as_int_index():
+    """`bool` is a subclass of `int` in Python — exclude it explicitly."""
+    with pytest.raises(ValueError, match="must be int"):
+        _validate_judgment_entry(
+            {"index": True, "is_decorative": False, "description": "x"},
+            batch_size=2,
+        )
+
+
+def test_validate_judgment_entry_rejects_out_of_range_index():
+    """An index outside `[0, batch_size)` is a contract violation."""
+    with pytest.raises(ValueError, match="out of batch range"):
+        _validate_judgment_entry(
+            {"index": 5, "is_decorative": False, "description": "x"},
+            batch_size=2,
+        )
+
+
+def test_validate_judgment_entry_rejects_non_bool_decorative():
+    """`is_decorative` must be a real bool, not a truthy string."""
+    with pytest.raises(ValueError, match="is_decorative"):
+        _validate_judgment_entry(
+            {"index": 0, "is_decorative": "yes", "description": "x"},
+            batch_size=1,
+        )
+
+
+# --------------------------------------------------------------------- orchestrator
+
+
+def test_describe_all_transitions_downloaded_to_described(tmp_path: Path):
+    """The happy path: one downloaded photo becomes one described entry."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0, description="A diagram.")]])
+    report = describe_all(
+        store,
+        media_root,
+        model="claude-sonnet-4-6",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    assert isinstance(item.media[0], MediaPhotoDescribed)
+    assert item.media[0].description == "A diagram."
+    assert item.media[0].description_lang == "English"
+    assert item.media[0].description_version == "v1"
+    assert item.media[0].is_decorative is False
+    assert report.photos_described == 1
+    assert report.photos_failed == 0
+    assert report.batches_attempted == 1
+
+
+def test_describe_all_is_noop_for_already_described_current_version(tmp_path: Path):
+    """Re-running over a described entry at the current version is a no-op."""
+    media_root = tmp_path / "media"
+    entry = _described(item_id="1", index=0, version="v1", media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    # No payloads queued: a no-op run must not call the API at all.
+    client = _FakeVisionClient([])
+    report = describe_all(
+        store,
+        media_root,
+        model="claude-sonnet-4-6",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    assert isinstance(item.media[0], MediaPhotoDescribed)
+    assert item.media[0].description == "a previously-described image"
+    assert report.photos_described == 0
+    assert report.photos_skipped_already_described == 1
+    assert report.batches_attempted == 0
+    assert client.messages.calls == []
+
+
+def test_describe_all_force_redescribes_current_version(tmp_path: Path):
+    """`--force` re-describes everything, current-version included."""
+    media_root = tmp_path / "media"
+    entry = _described(item_id="1", index=0, version="v1", media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0, description="updated description")]])
+    describe_all(
+        store,
+        media_root,
+        model="claude-sonnet-4-6",
+        output_language="English",
+        description_version="v1",
+        force=True,
+        client=client,
+    )
+    assert isinstance(item.media[0], MediaPhotoDescribed)
+    assert item.media[0].description == "updated description"
+
+
+def test_describe_all_redescribes_stale_version_without_force(tmp_path: Path):
+    """Bumping `description_version` invalidates stale entries automatically."""
+    media_root = tmp_path / "media"
+    entry = _described(item_id="1", index=0, version="v1", media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0, description="v2 description")]])
+    describe_all(
+        store,
+        media_root,
+        model="claude-sonnet-4-6",
+        output_language="English",
+        description_version="v2",
+        client=client,
+    )
+    assert isinstance(item.media[0], MediaPhotoDescribed)
+    assert item.media[0].description_version == "v2"
+    assert item.media[0].description == "v2 description"
+
+
+def test_describe_all_batches_five_images_per_call_by_default(tmp_path: Path):
+    """Default batch size is 5 — 6 photos must produce 2 API calls (5 + 1)."""
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(6)],
+    )
+    store = {"1": item}
+    first_batch = [_judgment(i, description=f"img {i}") for i in range(5)]
+    second_batch = [_judgment(0, description="img 5")]
+    client = _FakeVisionClient([first_batch, second_batch])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    assert len(client.messages.calls) == 2
+    assert all(isinstance(m, MediaPhotoDescribed) for m in item.media)
+
+
+def test_describe_all_respects_custom_batch_size(tmp_path: Path):
+    """A non-default `batch_size` must control how many images per call."""
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(4)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient(
+        [
+            [_judgment(0, description="x"), _judgment(1, description="x")],
+            [_judgment(0, description="x"), _judgment(1, description="x")],
+        ]
+    )
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=2,
+        client=client,
+    )
+    assert len(client.messages.calls) == 2
+
+
+def test_describe_all_decorative_judgment_writes_empty_description(tmp_path: Path):
+    """A decorative classification produces an empty description on the variant.
+
+    The orchestrator enforces the rubric contract even if the model
+    returned non-empty text — `is_decorative` implies empty description
+    at the boundary.
+    """
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    # The model put text in description despite is_decorative=True; the
+    # orchestrator must overwrite it with the empty string.
+    client = _FakeVisionClient(
+        [[{"index": 0, "is_decorative": True, "description": "shouldnt persist"}]]
+    )
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    assert isinstance(item.media[0], MediaPhotoDescribed)
+    assert item.media[0].is_decorative is True
+    assert item.media[0].description == ""
+
+
+def test_describe_all_isolates_a_batch_error(tmp_path: Path, capsys):
+    """One failing batch must NOT abort the rest of the run."""
+    from anthropic import APIError
+
+    media_root = tmp_path / "media"
+    item_a = _item("a", [_downloaded(item_id="a", index=0, media_root=media_root)])
+    item_b = _item("b", [_downloaded(item_id="b", index=0, media_root=media_root)])
+    store = {"a": item_a, "b": item_b}
+    client = _FakeVisionClient(
+        [
+            APIError("503 service unavailable", request=None, body=None),
+            [_judgment(0, description="ok")],
+        ]
+    )
+    report = describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=1,
+        client=client,
+    )
+    # Item a stayed Downloaded; item b transitioned.
+    assert isinstance(item_a.media[0], MediaPhotoDownloaded)
+    assert isinstance(item_b.media[0], MediaPhotoDescribed)
+    assert report.photos_described == 1
+    assert report.photos_failed == 1
+    assert report.batches_failed == 1
+    err = capsys.readouterr().err
+    assert "SUMMARY: described: 1, failed: 1" in err
+
+
+def test_describe_all_raises_when_every_batch_fails(tmp_path: Path):
+    """A total-failure run (every batch errored) must raise RuntimeError."""
+    from anthropic import APIError
+
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(2)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient(
+        [
+            APIError("401 unauthorized", request=None, body=None),
+            APIError("401 unauthorized", request=None, body=None),
+        ]
+    )
+    with pytest.raises(RuntimeError, match=r"All 2 describe batches failed"):
+        describe_all(
+            store,
+            media_root,
+            model="m",
+            output_language="English",
+            description_version="v1",
+            batch_size=1,
+            client=client,
+        )
+
+
+def test_describe_all_substitutes_output_language_in_system_prompt(tmp_path: Path):
+    """The rubric's `{language}` placeholder must be substituted before sending."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0)]])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="Spanish",
+        description_version="v1",
+        client=client,
+    )
+    system = client.messages.calls[0]["system"]
+    assert "{language}" not in system
+    assert "Spanish" in system
+
+
+def test_describe_all_records_language_on_described_variant(tmp_path: Path):
+    """The persisted `description_lang` must reflect the call-time language."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0)]])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="Spanish",
+        description_version="v1",
+        client=client,
+    )
+    assert isinstance(item.media[0], MediaPhotoDescribed)
+    assert item.media[0].description_lang == "Spanish"
+
+
+def test_describe_all_propagates_programmer_bugs(tmp_path: Path):
+    """`AttributeError` (programmer bug) must NOT be swallowed — propagate."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+
+    class _Boom:
+        class messages:  # noqa: N801
+            @staticmethod
+            def create(*_args, **_kwargs):
+                raise AttributeError("programmer bug — undefined attribute")
+
+    with pytest.raises(AttributeError, match="programmer bug"):
+        describe_all(
+            store,
+            media_root,
+            model="m",
+            output_language="English",
+            description_version="v1",
+            client=_Boom(),
+        )
+
+
+def test_describe_all_propagates_keyboard_interrupt(tmp_path: Path):
+    """Ctrl-C must propagate — falls through the narrow Exception catch."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+
+    class _CtrlC:
+        class messages:  # noqa: N801
+            @staticmethod
+            def create(*_args, **_kwargs):
+                raise KeyboardInterrupt
+
+    with pytest.raises(KeyboardInterrupt):
+        describe_all(
+            store,
+            media_root,
+            model="m",
+            output_language="English",
+            description_version="v1",
+            client=_CtrlC(),
+        )
+
+
+def test_describe_all_limit_caps_attempts(tmp_path: Path):
+    """`--limit N` must stop after N photos have been described."""
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(5)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0), _judgment(1)]])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        limit=2,
+        batch_size=2,
+        client=client,
+    )
+    described = sum(1 for m in item.media if isinstance(m, MediaPhotoDescribed))
+    downloaded = sum(1 for m in item.media if isinstance(m, MediaPhotoDownloaded))
+    assert described == 2
+    assert downloaded == 3
+    assert len(client.messages.calls) == 1
+
+
+def test_describe_all_items_filter_restricts_scope(tmp_path: Path):
+    """`--items <id>` must skip items not in the filter list."""
+    media_root = tmp_path / "media"
+    item_a = _item("a", [_downloaded(item_id="a", index=0, media_root=media_root)])
+    item_b = _item("b", [_downloaded(item_id="b", index=0, media_root=media_root)])
+    store = {"a": item_a, "b": item_b}
+    client = _FakeVisionClient([[_judgment(0)]])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        items_filter=["b"],
+        client=client,
+    )
+    assert isinstance(item_a.media[0], MediaPhotoDownloaded)
+    assert isinstance(item_b.media[0], MediaPhotoDescribed)
+
+
+def test_describe_all_handles_missing_file_as_per_batch_failure(tmp_path: Path):
+    """A missing photo file is per-photo failure, not a total abort.
+
+    The entry says "downloaded" but the bytes are not on disk
+    (operator removed `data/media/`, snapshot restored old items.json,
+    etc). The orchestrator catches the OSError, marks the whole batch
+    failed, and continues with the next batch.
+    """
+    media_root = tmp_path / "media"
+    # Entry A: bytes on disk. Entry B: bytes missing on purpose.
+    entry_a = _downloaded(item_id="a", index=0, media_root=media_root)
+    entry_b_missing = MediaPhotoDownloaded(
+        url="https://pbs.twimg.com/media/b.jpg",
+        local_path="b/0.jpg",
+        width=4,
+        height=3,
+        bytes_size=512,
+        downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+    )
+    item_a = _item("a", [entry_a])
+    item_b = _item("b", [entry_b_missing])
+    store = {"a": item_a, "b": item_b}
+    client = _FakeVisionClient([[_judgment(0)], [_judgment(0)]])
+    report = describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=1,
+        client=client,
+    )
+    assert isinstance(item_a.media[0], MediaPhotoDescribed)
+    assert isinstance(item_b.media[0], MediaPhotoDownloaded)  # stayed Downloaded
+    assert report.photos_described == 1
+    assert report.photos_failed == 1
+
+
+def test_describe_all_emits_summary_line_on_partial_failure(tmp_path: Path, capsys):
+    """Partial-failure runs emit a SUMMARY line; same convention as `media`."""
+    from anthropic import APIError
+
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(2)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient(
+        [
+            [_judgment(0)],
+            APIError("503", request=None, body=None),
+        ]
+    )
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=1,
+        client=client,
+    )
+    err = capsys.readouterr().err
+    assert "SUMMARY: described: 1, failed: 1" in err
+
+
+def test_describe_all_emits_no_summary_when_all_succeed(tmp_path: Path, capsys):
+    """A clean batch stays silent on stderr — same convention as `media`."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0)]])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    err = capsys.readouterr().err
+    assert "SUMMARY:" not in err
+
+
+def test_describe_all_fires_on_progress_after_each_batch(tmp_path: Path):
+    """The Ctrl-C-coherent invariant: persistence fires between batches."""
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(3)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient(
+        [
+            [_judgment(0)],
+            [_judgment(0)],
+            [_judgment(0)],
+        ]
+    )
+    calls: list[int] = []
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=1,
+        client=client,
+        on_progress=lambda: calls.append(len(calls) + 1),
+    )
+    assert calls == [1, 2, 3]
+
+
+def test_describe_all_skips_pending_failed_and_video_silently(tmp_path: Path):
+    """Non-downloaded variants are silently out of scope — no API call, no failure."""
+    media_root = tmp_path / "media"
+    pending = MediaPhotoPending(url="https://pbs.twimg.com/media/p.jpg")
+    failed = MediaPhotoFailed(
+        url="https://pbs.twimg.com/media/f.jpg",
+        failure_reason="http_4xx",
+        error="HTTP 404",
+        attempts=1,
+        last_attempt_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+    )
+    video = MediaVideoPending(url="https://video.twimg.com/x.mp4")
+    item = _item("1", [pending, failed, video])
+    store = {"1": item}
+    client = _FakeVisionClient([])  # no calls expected
+    report = describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    assert report.photos_attempted == 0
+    assert report.photos_described == 0
+    assert report.batches_attempted == 0
+    assert client.messages.calls == []
+
+
+def test_describe_all_total_failure_does_not_emit_summary(tmp_path: Path, capsys):
+    """The all-failed branch raises BEFORE the summary print — verify."""
+    from anthropic import APIError
+
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([APIError("503", request=None, body=None)])
+    with pytest.raises(RuntimeError):
+        describe_all(
+            store,
+            media_root,
+            model="m",
+            output_language="English",
+            description_version="v1",
+            client=client,
+        )
+    err = capsys.readouterr().err
+    assert "SUMMARY:" not in err
+
+
+def test_describe_all_carries_over_local_path_dimensions(tmp_path: Path):
+    """The described variant inherits the on-disk fields verbatim — no re-decode."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    # Tweak the fields so we can prove they round-tripped intact.
+    entry = MediaPhotoDownloaded(
+        url=entry.url,
+        local_path=entry.local_path,
+        width=1920,
+        height=1080,
+        bytes_size=123456,
+        downloaded_at=entry.downloaded_at,
+    )
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0)]])
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        client=client,
+    )
+    described = item.media[0]
+    assert isinstance(described, MediaPhotoDescribed)
+    assert described.local_path == "1/0.jpg"
+    assert described.width == 1920
+    assert described.height == 1080
+    assert described.bytes_size == 123456
+
+
+def test_describe_all_uses_injectable_clock(tmp_path: Path):
+    """`now` injection lets tests assert the timestamp deterministically."""
+    media_root = tmp_path / "media"
+    entry = _downloaded(item_id="1", index=0, media_root=media_root)
+    item = _item("1", [entry])
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0)]])
+    fixed = datetime(2030, 1, 1, 12, tzinfo=timezone.utc)
+    describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        client=client,
+        now=lambda: fixed,
+    )
+    described = item.media[0]
+    assert isinstance(described, MediaPhotoDescribed)
+    assert described.described_at == fixed
+
+
+# --------------------------------------------------------------------- summary line
+
+
+def test_emit_summary_line_silent_when_nothing_happened(capsys):
+    """No attempts, no skips → no SUMMARY noise. Same convention as media."""
+    emit_summary_line(DescribeReport())
+    assert "SUMMARY:" not in capsys.readouterr().err
+
+
+def test_emit_summary_line_prints_when_described_or_skipped(capsys):
+    """A non-zero `photos_described` triggers the SUMMARY line."""
+    report = DescribeReport(
+        photos_attempted=1,
+        photos_described=1,
+    )
+    emit_summary_line(report)
+    err = capsys.readouterr().err
+    assert "SUMMARY: described: 1, failed: 0, skipped: 0" in err
+
+
+def test_emit_summary_line_prints_when_only_skipped(capsys):
+    """An all-skipped no-op (idempotency proof) emits the SUMMARY line too."""
+    report = DescribeReport(photos_skipped_already_described=3)
+    emit_summary_line(report)
+    err = capsys.readouterr().err
+    assert "skipped: 3" in err
+
+
+# --------------------------------------------------------------------- response helper
+
+
+def test_fake_response_block_carries_text_attribute():
+    """`tests.conftest.FakeBlock` requires both `type` and `text` attributes."""
+    block = FakeBlock(payload={"x": 1})
+    assert block.type == "text"
+    assert isinstance(block.text, str)
+
+
+def test_fake_response_round_trips():
+    """Smoke check on the `FakeResponse` machinery the tests rely on."""
+    payload = {"summary": "x", "primary_topic": "x", "topics": ["x"]}
+    response = FakeResponse(payload)
+    assert response.content[0].text
+
+
+def test_fake_anthropic_records_calls():
+    """`FakeAnthropic.messages.calls` records every kwarg dict — relied on by other tests."""
+    client = FakeAnthropic([{"x": 1}])
+    client.messages.create(model="m", system="s", messages=[])
+    assert client.messages.calls[0]["model"] == "m"

From 3c302f8646a6f52183ccbc2f1934030adec7c7ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:31:35 +0200
Subject: [PATCH 04/11] [#34] add xbrain describe CLI command + snapshot
 trigger

The CLI surface mirrors xbrain media: --force, --limit, --items,
--model, --batch-size, --verbose. The output_language and the
description_version both come from config (config.toml + new
[describe] section); --model overrides config.describe_model per
invocation. The pre-describe snapshot is auto-taken so a runaway
prompt or wrong model can be rolled back with xbrain snapshot
restore.

Persistence is double-anchored: on_progress writes the store
between batches (Ctrl-C-coherent invariant) and a finally clause
catches any total-failure RuntimeError so the per-photo
MediaPhotoDescribed records that landed before the raise are not
discarded.

config.toml.example documents the new [describe] section with
defaults (Sonnet 4.6 + version v1). 4 CLI tests cover the happy
path, no-op exit-0, --items mismatch warning, and total-failure
exit-1 propagation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 config.toml.example  |  11 ++++
 src/xbrain/cli.py    | 128 ++++++++++++++++++++++++++++++++++++++
 src/xbrain/config.py |  14 +++++
 tests/test_cli.py    | 145 +++++++++++++++++++++++++++++++++++++++++++
 tests/test_config.py |  27 ++++++++
 5 files changed, 325 insertions(+)

diff --git a/config.toml.example b/config.toml.example
index adcf202..cb5bfc7 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -39,3 +39,14 @@ language = "English"
 #   "wikilink" - **Topics:** [[ai-coding]] · [[software-engineering]]  (default)
 #   "hashtag"  - **Topics:** #ai-coding #software-engineering
 topic_style = "wikilink"
+
+[describe]
+# Vision model used by `xbrain describe`. Sonnet 4.6 is the default —
+# the quality / cost sweet spot (~$3-5 for a 2k-image corpus). Override
+# per run with `--model` while iterating; the CLI flag wins.
+model = "claude-sonnet-4-6"
+# Description-version tag persisted on every described photo. Bumping
+# this value invalidates existing descriptions: the next `xbrain
+# describe` run re-describes stale entries automatically. Use it when
+# you change the describe-image rubric or expectations.
+version = "v1"
diff --git a/src/xbrain/cli.py b/src/xbrain/cli.py
index 7227722..3d9e3be 100644
--- a/src/xbrain/cli.py
+++ b/src/xbrain/cli.py
@@ -14,6 +14,8 @@
 from xbrain import snapshot
 from xbrain.archive import parse_archive
 from xbrain.config import Config, load_config
+from xbrain.describe import describe_all as run_describe_all
+from xbrain.describe import emit_summary_line as describe_emit_summary_line
 from xbrain.diff import diff_snapshots, format_json, format_text
 from xbrain.enrich import apply_worksheet_judgments, enrich_with_executor, items_pending_enrichment
 from xbrain.executors.api import ApiExecutor
@@ -359,6 +361,132 @@ def media(
     _run_media(cfg, force=force, limit=limit, items_filter=items_filter, verbose=verbose)
 
 
+def _run_describe(
+    cfg: Config,
+    *,
+    force: bool,
+    limit: int | None,
+    items_filter: list[str] | None,
+    model: str,
+    batch_size: int,
+    verbose: bool,
+) -> None:
+    """Run the vision-describe orchestrator and persist after every batch.
+
+    Always snapshots `data/` first (the same recovery boundary as
+    `xbrain media`): a botched run — a wrong model, a runaway prompt
+    — can be undone with `xbrain snapshot restore`. Persistence
+    happens twice for the same reason as `_run_media`: once after
+    every batch (the `on_progress` callback fires between batches so
+    Ctrl-C mid-run leaves `items.json` coherent), and once
+    unconditionally at the end so the elapsed-described timestamps
+    are captured even if the orchestrator raised on total failure.
+    """
+    if items_filter:
+        target = set(items_filter)
+        store_ids = set(load_store(cfg.items_path))
+        missing = target - store_ids
+        if missing and not (target & store_ids):
+            typer.echo(
+                f"AVISO: --items {','.join(items_filter)} no coincide con ningún item "
+                f"del store ({len(store_ids)} items). El run será un no-op.",
+                err=True,
+            )
+    _auto_snapshot(cfg, "describe")
+    store = load_store(cfg.items_path)
+
+    def _persist() -> None:
+        save_store(store, cfg.items_path)
+
+    try:
+        report = run_describe_all(
+            store,
+            cfg.media_dir,
+            model=model,
+            output_language=cfg.output_language,
+            description_version=cfg.describe_version,
+            force=force,
+            limit=limit,
+            items_filter=items_filter,
+            batch_size=batch_size,
+            on_progress=_persist,
+        )
+    finally:
+        # Persist whatever transitioned, even if `describe_all` raised. A
+        # RuntimeError on total failure must not discard the per-photo
+        # MediaPhotoDescribed records that landed before the raise.
+        save_store(store, cfg.items_path)
+    describe_emit_summary_line(report)
+    typer.echo(
+        f"Describe: descritas {report.photos_described}, "
+        f"fallidas {report.photos_failed}, "
+        f"saltadas {report.photos_skipped_already_described}"
+    )
+    if verbose and report.per_item_failures:
+        typer.echo("Failed photos:", err=True)
+        for item_id, failures in sorted(report.per_item_failures.items()):
+            for url, error in failures:
+                typer.echo(f"  {item_id}  {url}  {error}", err=True)
+
+
+@app.command()
+@_handle_cli_errors
+def describe(
+    force: bool = typer.Option(
+        False,
+        "--force",
+        help="Re-describir todas las fotos, incluso las ya descritas en la versión actual.",
+    ),
+    limit: int | None = typer.Option(
+        None,
+        "--limit",
+        help="Máximo número de fotos a describir en esta ejecución.",
+    ),
+    items: str | None = typer.Option(
+        None,
+        "--items",
+        help="IDs de items separados por comas para limitar el alcance del run.",
+    ),
+    model: str | None = typer.Option(
+        None,
+        "--model",
+        help="Modelo de visión a usar. Si no se pasa, se usa el del config (`describe.model`).",
+    ),
+    batch_size: int = typer.Option(
+        5,
+        "--batch-size",
+        min=1,
+        help="Número de imágenes por llamada a la API. 5 es el sweet spot (12-15%% ahorro de tokens).",
+    ),
+    verbose: bool = typer.Option(
+        False,
+        "--verbose",
+        help="Imprime cada foto fallida (item_id, URL, error) al final del run.",
+    ),
+) -> None:
+    """Describe las fotos descargadas con un LLM de visión.
+
+    Solo describe fotos con bytes en disco (`MediaPhotoDownloaded`).
+    Las entradas ya descritas en la versión actual se saltan; bumpear
+    `[describe].version` en `config.toml` fuerza un re-describe
+    automático sin `--force`. Las descripciones se persisten en
+    `items.json` y son consumidas por `xbrain enrich` y `xbrain topics`
+    en las llamadas LLM subsiguientes.
+    """
+    cfg = _config()
+    items_filter = [s.strip() for s in items.split(",") if s.strip()] if items else None
+    chosen_model = model or cfg.describe_model
+    _run_describe(
+        cfg,
+        force=force,
+        limit=limit,
+        items_filter=items_filter,
+        model=chosen_model,
+        batch_size=batch_size,
+        verbose=verbose,
+    )
+
+
 @app.command()
 @_handle_cli_errors
 def enrich(
diff --git a/src/xbrain/config.py b/src/xbrain/config.py
index 06568da..69c2f6a 100644
--- a/src/xbrain/config.py
+++ b/src/xbrain/config.py
@@ -29,6 +29,17 @@ class Config:
     topics_resynth_threshold: int
     output_language: str  # one of xbrain.i18n.SUPPORTED_LANGUAGES
     topic_style: str  # one of xbrain.config.SUPPORTED_TOPIC_STYLES
+    # `describe_model` defaults to Sonnet 4.6 — the spec settled on it as the
+    # quality / cost sweet spot for vision (~$3-5 for a 2k-image corpus).
+    # Override per run via `xbrain describe --model ...` when iterating on
+    # prompt or budget; the CLI flag wins over the config value.
+    describe_model: str
+    # `describe_version` tags every produced description so a prompt
+    # evolution can be rolled out incrementally: bumping the value here
+    # makes the next `xbrain describe` run re-describe stale entries
+    # automatically (no `--force` needed). The string is exact-match —
+    # there is no ordering relation, only equality.
+    describe_version: str
 
     @property
     def items_path(self) -> Path:
@@ -95,6 +106,7 @@ def load_config(repo_root: Path) -> Config:
             f"config.toml: [output].topic_style must be one of "
             f"{list(SUPPORTED_TOPIC_STYLES)}, got {topic_style!r}"
         )
+    describe = settings.get("describe", {})
     return Config(
         repo_root=repo_root,
         vault=vault,
@@ -107,4 +119,6 @@ def load_config(repo_root: Path) -> Config:
         topics_resynth_threshold=resynth_threshold,
         output_language=output_language,
         topic_style=topic_style,
+        describe_model=describe.get("model", "claude-sonnet-4-6"),
+        describe_version=describe.get("version", "v1"),
     )
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c4f3cc2..d36d04b 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -834,3 +834,148 @@ def test_vocab_rejects_unknown_executor(tmp_path, monkeypatch):
     result = runner.invoke(app, ["vocab", "--executor", "bogus"])
     assert result.exit_code == 1
     assert "bogus" in result.output
+
+
+# ----------------------------------------------------------------------- describe
+
+
+def _setup_describe_repo(tmp_path: Path, monkeypatch) -> Path:
+    """Like `_setup_repo` but with a pre-populated photo on disk + Downloaded variant."""
+    import io as _io
+    from datetime import datetime, timezone
+
+    from PIL import Image
+
+    from xbrain.models import Author, Item, MediaPhotoDownloaded
+
+    _setup_repo(tmp_path, monkeypatch)
+    media_root = tmp_path / "data" / "media"
+    (media_root / "42").mkdir(parents=True, exist_ok=True)
+    buffer = _io.BytesIO()
+    Image.new("RGB", (8, 6), color=(10, 20, 30)).save(buffer, format="JPEG")
+    (media_root / "42" / "0.jpg").write_bytes(buffer.getvalue())
+
+    item = Item(
+        id="42",
+        source="bookmark",
+        url="https://x.com/a/status/42",
+        author=Author(handle="a", name="A"),
+        text="text",
+        created_at=datetime(2026, 5, 10, tzinfo=timezone.utc),
+        captured_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        media=[
+            MediaPhotoDownloaded(
+                url="https://pbs.twimg.com/media/42-0.jpg",
+                local_path="42/0.jpg",
+                width=8,
+                height=6,
+                bytes_size=200,
+                downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            )
+        ],
+    )
+    save_store({"42": item}, tmp_path / "data" / "items.json")
+    return tmp_path
+
+
+def test_describe_command_transitions_downloaded_to_described(tmp_path: Path, monkeypatch):
+    """End-to-end: a Downloaded photo becomes Described via the CLI command."""
+    import json as _json
+
+    from xbrain.models import MediaPhotoDescribed
+    from xbrain.store import load_store
+
+    _setup_describe_repo(tmp_path, monkeypatch)
+
+    class _FakeBlock:
+        type = "text"
+
+        def __init__(self, text: str):
+            self.text = text
+
+    class _FakeResp:
+        def __init__(self, text: str):
+            self.content = [_FakeBlock(text)]
+
+    class _FakeMessages:
+        def __init__(self):
+            self.calls: list[dict] = []
+
+        def create(self, **kwargs):
+            self.calls.append(kwargs)
+            payload = _json.dumps([{"index": 0, "is_decorative": False, "description": "A chart."}])
+            return _FakeResp(payload)
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _FakeMessages()
+
+    import xbrain.cli as cli
+
+    fake_client = _FakeClient()
+
+    def _patched_run(store, media_root, **kwargs):
+        kwargs["client"] = fake_client
+        return _orig(store, media_root, **kwargs)
+
+    _orig = cli.run_describe_all
+    monkeypatch.setattr(cli, "run_describe_all", _patched_run)
+
+    result = runner.invoke(app, ["describe"])
+    assert result.exit_code == 0, result.output
+    reloaded = load_store(tmp_path / "data" / "items.json")
+    entry = reloaded["42"].media[0]
+    assert isinstance(entry, MediaPhotoDescribed)
+    assert entry.description == "A chart."
+    assert entry.description_lang == "English"
+    assert entry.description_version == "v1"
+
+
+def test_describe_command_runs_on_empty_store(tmp_path: Path, monkeypatch):
+    """No items → describe is a no-op exit-0, with a snapshot still taken."""
+    _setup_repo(tmp_path, monkeypatch)
+    result = runner.invoke(app, ["describe"])
+    assert result.exit_code == 0
+    # A snapshot was created — recovery boundary mirrors media.
+    snapshots = list((tmp_path / "data" / "snapshots").glob("*-describe"))
+    assert snapshots, "describe must auto-snapshot data/ before running"
+
+
+def test_describe_command_warns_when_items_filter_matches_nothing(tmp_path, monkeypatch):
+    """`--items` with no matches surfaces an AVISO line — same pattern as media."""
+    _setup_describe_repo(tmp_path, monkeypatch)
+    result = runner.invoke(app, ["describe", "--items", "no-such-id"])
+    assert result.exit_code == 0
+    assert "AVISO" in result.output or "AVISO" in (result.stderr or "")
+
+
+def test_describe_command_propagates_total_failure_as_exit_1(tmp_path, monkeypatch):
+    """A total-failure RuntimeError surfaces as exit code 1 via `_handle_cli_errors`."""
+    from anthropic import APIError
+
+    _setup_describe_repo(tmp_path, monkeypatch)
+
+    class _AlwaysFailingMessages:
+        def __init__(self):
+            self.calls: list[dict] = []
+
+        def create(self, **kwargs):
+            self.calls.append(kwargs)
+            raise APIError("401 unauthorized", request=None, body=None)
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _AlwaysFailingMessages()
+
+    import xbrain.cli as cli
+
+    def _patched(store, media_root, **kwargs):
+        kwargs["client"] = _FakeClient()
+        return _orig(store, media_root, **kwargs)
+
+    _orig = cli.run_describe_all
+    monkeypatch.setattr(cli, "run_describe_all", _patched)
+
+    result = runner.invoke(app, ["describe"])
+    assert result.exit_code == 1
+    assert "Error" in result.output
diff --git a/tests/test_config.py b/tests/test_config.py
index 5a2ddbe..752dea5 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -195,3 +195,30 @@ def test_load_config_rejects_unknown_topic_style(tmp_path: Path):
     )
     with pytest.raises(ValueError, match="topic_style"):
         load_config(tmp_path)
+
+
+def test_load_config_describe_settings_have_defaults(tmp_path: Path):
+    """No [describe] section → Sonnet 4.6 + version v1 (the spec defaults)."""
+    _write_repo(tmp_path)
+    cfg = load_config(tmp_path)
+    assert cfg.describe_model == "claude-sonnet-4-6"
+    assert cfg.describe_version == "v1"
+
+
+def test_load_config_round_trips_describe_overrides(tmp_path: Path):
+    """[describe] section overrides — operators can pin a different model + version."""
+    (tmp_path / "config.toml").write_text(
+        "[paths]\n"
+        'vault = "/tmp/vault"\n'
+        'output_subdir = "learnings/x-knowledge"\n'
+        'data_dir = "data"\n'
+        "[x]\n"
+        'handle = "vgonpa"\n'
+        "[describe]\n"
+        'model = "claude-opus-4-1"\n'
+        'version = "v3"\n',
+        encoding="utf-8",
+    )
+    cfg = load_config(tmp_path)
+    assert cfg.describe_model == "claude-opus-4-1"
+    assert cfg.describe_version == "v3"

From 49287ac4ddc084297b451592948fe5aef8101187 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:35:12 +0200
Subject: [PATCH 05/11] [#34] integrate image descriptions into enrich + topics
 prompts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The enrich-time prompt in executors/api.py now carries an
"Images in this post:" section listing every non-decorative
MediaPhotoDescribed entry's prose, sitting between the post body
and the links/article block so the LLM reads context in natural
order. Decorative photos are filtered at the seam — they introduce
no topic noise.

TopicInput grows an image_descriptions field; build_topic_inputs
flattens content-bearing descriptions across every post in the
topic and feeds them to the topic-synth prompt below the
per-post summaries. Same decorative-filter as enrich. The prompt
shape stays back-compat: items / topics without described photos
emit no images section at all.

8 new tests guard the integration: inclusion, decorative-filter,
omission-when-empty, ordering vs links/article. Both _user_prompt
helpers were split into per-section builders to keep complexity
under grade C — no new C functions introduced.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/xbrain/executors/api.py | 84 +++++++++++++++++++++++++++++--------
 src/xbrain/topic_synth.py   | 24 ++++++++++-
 src/xbrain/topics.py        | 43 +++++++++++++++----
 tests/test_executors_api.py | 62 +++++++++++++++++++++++++++
 tests/test_topic_synth.py   | 25 +++++++++++
 tests/test_topics.py        | 54 ++++++++++++++++++++++++
 6 files changed, 265 insertions(+), 27 deletions(-)

diff --git a/src/xbrain/executors/api.py b/src/xbrain/executors/api.py
index 2b18138..4f9c26e 100644
--- a/src/xbrain/executors/api.py
+++ b/src/xbrain/executors/api.py
@@ -13,7 +13,7 @@
 
 from xbrain.executors.base import EnrichmentJudgment
 from xbrain.llm_json import json_from_response
-from xbrain.models import ContentSourceSuccess, Item, Topic
+from xbrain.models import ContentSourceSuccess, Item, MediaPhotoDescribed, Topic
 from xbrain.rubrics import ARTICLE_CHAR_LIMIT, load_rubric
 
 _MAX_TOKENS = 600
@@ -60,6 +60,69 @@ def _system_prompt(language: str) -> str:
     )
 
 
+def _content_image_descriptions(item: Item) -> list[str]:
+    """Return non-decorative image descriptions on the item, in media order.
+
+    Decorative photos (`is_decorative=True`) are filtered out at this
+    seam so they introduce no topic noise — an avatar or a reaction
+    meme would otherwise drag the assigned topics toward whatever the
+    image happened to depict. Items without described photos return an
+    empty list.
+    """
+    return [
+        entry.description
+        for entry in item.media
+        if isinstance(entry, MediaPhotoDescribed)
+        and not entry.is_decorative
+        and entry.description
+    ]
+
+
+def _images_section(item: Item) -> list[str]:
+    """Build the `Images in this post:` block, or an empty list when not applicable.
+
+    Visual content carries topic signal too. The describe stage
+    already filtered decoratives — this just splices the prose in
+    right before the article body so the LLM reads the post + the
+    image evidence + the article in natural order.
+    """
+    image_descriptions = _content_image_descriptions(item)
+    if not image_descriptions:
+        return []
+    lines = ["", "Images in this post:"]
+    lines += [f"- {description}" for description in image_descriptions]
+    return lines
+
+
+def _links_section(item: Item) -> list[str]:
+    """Build the `Links in the post:` block, or an empty list when not applicable."""
+    if not item.links:
+        return []
+    lines = [
+        "",
+        "Links in the post (the domain is topic signal even when "
+        "the article body is unavailable):",
+    ]
+    lines += [f"- {ln.url}  (domain: {ln.domain})" for ln in item.links]
+    return lines
+
+
+def _article_sections(item: Item) -> list[str]:
+    """Build one block per successfully-fetched article. Empty if no content."""
+    if item.content is None or not item.content.sources:
+        return []
+    lines: list[str] = []
+    for src in item.content.sources:
+        # Narrow to the success variant — only those carry `title`/`text`.
+        if isinstance(src, ContentSourceSuccess) and src.text:
+            lines += [
+                "",
+                f"Linked article ({src.title or src.url}):",
+                src.text[:ARTICLE_CHAR_LIMIT],
+            ]
+    return lines
+
+
 def _user_prompt(item: Item, vocab: list[Topic]) -> str:
     parts = [
         "Controlled vocabulary (use only these slugs):",
@@ -70,22 +133,9 @@ def _user_prompt(item: Item, vocab: list[Topic]) -> str:
     ]
     if item.bookmark_folder:
         parts += ["", f"Saved by the user in the bookmark folder: {item.bookmark_folder}"]
-    if item.links:
-        parts += [
-            "",
-            "Links in the post (the domain is topic signal even when "
-            "the article body is unavailable):",
-        ]
-        parts += [f"- {ln.url}  (domain: {ln.domain})" for ln in item.links]
-    if item.content and item.content.sources:
-        # Narrow to the success variant — only those carry `title`/`text`.
-        for src in item.content.sources:
-            if isinstance(src, ContentSourceSuccess) and src.text:
-                parts += [
-                    "",
-                    f"Linked article ({src.title or src.url}):",
-                    src.text[:ARTICLE_CHAR_LIMIT],
-                ]
+    parts += _images_section(item)
+    parts += _links_section(item)
+    parts += _article_sections(item)
     return "\n".join(parts)
 
 
diff --git a/src/xbrain/topic_synth.py b/src/xbrain/topic_synth.py
index 94a7b41..be5243a 100644
--- a/src/xbrain/topic_synth.py
+++ b/src/xbrain/topic_synth.py
@@ -9,7 +9,7 @@
 
 import json
 import sys
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 
@@ -37,11 +37,21 @@ class OverviewJudgment(BaseModel):
 
 @dataclass
 class TopicInput:
-    """Everything an executor needs to synthesize one topic's overview."""
+    """Everything an executor needs to synthesize one topic's overview.
+
+    `summaries` is the per-post text already enrichment produced.
+    `image_descriptions` is the vision-LLM-produced prose for every
+    non-decorative photo in the topic's posts — fed to the
+    topic-synthesis LLM alongside the summaries so the topic page
+    reflects visual evidence that the per-post enrichment may have
+    only partially captured. Decorative photos are excluded at the
+    seam (mirrors `xbrain.executors.api._content_image_descriptions`).
+    """
 
     slug: str
     description: str
     summaries: list[str]
+    image_descriptions: list[str] = field(default_factory=list)
 
 
 def _system_prompt(language: str) -> str:
@@ -64,6 +74,16 @@ def _user_prompt(topic: TopicInput) -> str:
         f"Summaries of the {len(topic.summaries)} posts in this topic:",
     ]
     lines += [f"- {summary}" for summary in topic.summaries]
+    if topic.image_descriptions:
+        # Visual evidence supplements the per-post summaries: a chart
+        # or screenshot in a topic carries signal the summary may have
+        # only hinted at. Decorative photos are filtered out by the
+        # caller (`build_topic_inputs`) so they introduce no noise.
+        lines += [
+            "",
+            f"Images across the {len(topic.image_descriptions)} content-bearing photos in this topic:",
+        ]
+        lines += [f"- {description}" for description in topic.image_descriptions]
     return "\n".join(lines)
 
 
diff --git a/src/xbrain/topics.py b/src/xbrain/topics.py
index cdfeb32..0efa49f 100644
--- a/src/xbrain/topics.py
+++ b/src/xbrain/topics.py
@@ -13,7 +13,7 @@
 
 from xbrain import notes_io
 from xbrain.i18n import Strings, strings_for
-from xbrain.models import Item, Topic, TopicPage
+from xbrain.models import Item, MediaPhotoDescribed, Topic, TopicPage
 from xbrain.topic_synth import OverviewJudgment, TopicInput
 
 
@@ -172,10 +172,34 @@ def topics_needing_synth(
     return needing
 
 
+def _collect_summaries(item_pool: list[Item]) -> list[str]:
+    """Flatten the non-empty `enriched.summary` strings across a pool of items."""
+    return [
+        item.enriched.summary for item in item_pool if item.enriched and item.enriched.summary
+    ]
+
+
+def _collect_image_descriptions(item_pool: list[Item]) -> list[str]:
+    """Flatten content-bearing image descriptions across a pool of items.
+
+    Decorative photos are filtered out at this seam so the topic prompt
+    never carries avatar / reaction-meme noise — same filter the enrich
+    prompt applies in `xbrain.executors.api`.
+    """
+    return [
+        entry.description
+        for item in item_pool
+        for entry in item.media
+        if isinstance(entry, MediaPhotoDescribed)
+        and not entry.is_decorative
+        and entry.description
+    ]
+
+
 def build_topic_inputs(
     slugs: list[str], vocab: list[Topic], all_posts: dict[str, TopicPosts]
 ) -> list[TopicInput]:
-    """Build the synthesis input for each slug — its description + post summaries."""
+    """Build the synthesis input for each slug — description + summaries + image descriptions."""
     by_slug = {topic.slug: topic for topic in vocab}
     inputs: list[TopicInput] = []
     for slug in slugs:
@@ -183,12 +207,15 @@ def build_topic_inputs(
         if topic is None:
             raise ValueError(f"slug '{slug}' is not in the vocabulary")
         posts = all_posts.get(slug, TopicPosts())
-        summaries = [
-            item.enriched.summary
-            for item in (posts.primary + posts.also)
-            if item.enriched and item.enriched.summary
-        ]
-        inputs.append(TopicInput(slug=slug, description=topic.description, summaries=summaries))
+        item_pool = posts.primary + posts.also
+        inputs.append(
+            TopicInput(
+                slug=slug,
+                description=topic.description,
+                summaries=_collect_summaries(item_pool),
+                image_descriptions=_collect_image_descriptions(item_pool),
+            )
+        )
     return inputs
 
 
diff --git a/tests/test_executors_api.py b/tests/test_executors_api.py
index c022aad..ca3dd47 100644
--- a/tests/test_executors_api.py
+++ b/tests/test_executors_api.py
@@ -194,3 +194,65 @@ def test_api_executor_emits_no_summary_when_all_succeed(capsys):
     ex = ApiExecutor(model="m", output_language="English", client=client)
     ex.enrich_items([_item("1")], VOCAB)
     assert "SUMMARY:" not in capsys.readouterr().err
+
+
+def _described_photo(*, description: str, decorative: bool = False):
+    """Build a `MediaPhotoDescribed` for prompt-integration tests."""
+    from datetime import datetime, timezone
+
+    from xbrain.models import MediaPhotoDescribed
+
+    return MediaPhotoDescribed(
+        url="https://pbs.twimg.com/media/X.jpg",
+        local_path="1/0.jpg",
+        width=4,
+        height=3,
+        bytes_size=512,
+        downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        is_decorative=decorative,
+        description=description,
+        description_lang="English",
+        description_version="v1",
+        described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),
+    )
+
+
+def test_user_prompt_includes_content_bearing_image_descriptions():
+    """Non-decorative described photos must surface as `Images in this post:` lines."""
+    item = _item("1", media=[_described_photo(description="A chart of MMLU scores.")])
+    prompt = _user_prompt(item, VOCAB)
+    assert "Images in this post:" in prompt
+    assert "A chart of MMLU scores." in prompt
+
+
+def test_user_prompt_excludes_decorative_image_descriptions():
+    """Decorative photos must NOT appear in the prompt — pure noise."""
+    item = _item("1", media=[_described_photo(description="ignored", decorative=True)])
+    prompt = _user_prompt(item, VOCAB)
+    assert "Images in this post:" not in prompt
+
+
+def test_user_prompt_omits_images_section_when_no_described_photos():
+    """Items without any described photos must NOT have the images section.
+
+    Otherwise the LLM sees a hint of missing context and may hallucinate
+    visual evidence that does not exist. Regression guard.
+    """
+    item = _item("1")  # no media
+    prompt = _user_prompt(item, VOCAB)
+    assert "Images in this post:" not in prompt
+
+
+def test_user_prompt_image_descriptions_precede_links_and_article():
+    """Image section sits between the post body and the links/article."""
+    from xbrain.models import Link
+
+    item = _item(
+        "1",
+        media=[_described_photo(description="A diagram of GraphQL caching.")],
+        links=[Link(url="https://example.com/x", domain="example.com")],
+    )
+    prompt = _user_prompt(item, VOCAB)
+    image_idx = prompt.index("Images in this post:")
+    links_idx = prompt.index("Links in the post")
+    assert image_idx < links_idx
diff --git a/tests/test_topic_synth.py b/tests/test_topic_synth.py
index 67d4fdd..fb2c770 100644
--- a/tests/test_topic_synth.py
+++ b/tests/test_topic_synth.py
@@ -229,3 +229,28 @@ def test_synthesize_overviews_api_emits_no_summary_when_all_succeed(capsys):
         client=client,
     )
     assert "SUMMARY:" not in capsys.readouterr().err
+
+
+def test_user_prompt_includes_image_descriptions_when_provided():
+    """When `image_descriptions` is non-empty, the prompt carries an `Images` section."""
+    from xbrain.topic_synth import _user_prompt
+
+    topic_input = TopicInput(
+        slug="ai-coding",
+        description="LLMs writing software",
+        summaries=["s1"],
+        image_descriptions=["A flowchart of a feedback loop.", "Code in a terminal."],
+    )
+    prompt = _user_prompt(topic_input)
+    assert "Images across the 2 content-bearing photos" in prompt
+    assert "A flowchart of a feedback loop." in prompt
+    assert "Code in a terminal." in prompt
+
+
+def test_user_prompt_omits_image_section_when_empty():
+    """Default empty `image_descriptions` produces no image section — regression guard."""
+    from xbrain.topic_synth import _user_prompt
+
+    topic_input = TopicInput(slug="x", description="d", summaries=["s"])
+    prompt = _user_prompt(topic_input)
+    assert "Images across" not in prompt
diff --git a/tests/test_topics.py b/tests/test_topics.py
index 3e59dc4..75db3a3 100644
--- a/tests/test_topics.py
+++ b/tests/test_topics.py
@@ -197,6 +197,60 @@ def test_build_topic_inputs_rejects_an_unknown_slug():
         build_topic_inputs(["not-a-real-topic"], _VOCAB, {})
 
 
+def _described_photo_in_topic(*, description: str, decorative: bool = False):
+    """Build a `MediaPhotoDescribed` for the topic-inputs filtering test."""
+    from datetime import datetime, timezone
+
+    from xbrain.models import MediaPhotoDescribed
+
+    return MediaPhotoDescribed(
+        url="https://pbs.twimg.com/media/X.jpg",
+        local_path="1/0.jpg",
+        width=4,
+        height=3,
+        bytes_size=512,
+        downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        is_decorative=decorative,
+        description=description,
+        description_lang="English",
+        description_version="v1",
+        described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),
+    )
+
+
+def test_build_topic_inputs_collects_content_image_descriptions():
+    """Content-bearing photos contribute their descriptions; decoratives are filtered."""
+    from xbrain.topics import build_topic_inputs
+
+    primary_item = _enriched("1", "ai-coding", ["ai-coding"])
+    primary_item.media = [
+        _described_photo_in_topic(description="A chart of model accuracy."),
+        _described_photo_in_topic(description="ignored", decorative=True),
+    ]
+    also_item = _enriched("2", "career", ["career", "ai-coding"])
+    also_item.media = [
+        _described_photo_in_topic(description="A screenshot of a CI dashboard."),
+    ]
+    posts = {"ai-coding": TopicPosts()}
+    posts["ai-coding"].primary.append(primary_item)
+    posts["ai-coding"].also.append(also_item)
+    inputs = build_topic_inputs(["ai-coding"], _VOCAB, posts)
+    assert sorted(inputs[0].image_descriptions) == [
+        "A chart of model accuracy.",
+        "A screenshot of a CI dashboard.",
+    ]
+
+
+def test_build_topic_inputs_empty_when_no_described_photos():
+    """An item without any described media contributes an empty image list."""
+    from xbrain.topics import build_topic_inputs
+
+    posts = {"ai-coding": TopicPosts()}
+    posts["ai-coding"].primary.append(_enriched("1", "ai-coding", ["ai-coding"]))
+    inputs = build_topic_inputs(["ai-coding"], _VOCAB, posts)
+    assert inputs[0].image_descriptions == []
+
+
 def test_compute_topic_posts_dedups_duplicate_slugs_in_topics():
     # A store record with a duplicate slug in `enriched.topics` (pre-validation
     # data) must place the item once, not twice, in the also-relevant list.

From 110b3659e871c980a723ab34e88cdea1a42c1bad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:36:18 +0200
Subject: [PATCH 06/11] [#34] extend xbrain diff with described counts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The diff module already grew the described column + delta_described
field in the model commit. This commit adds the two regression tests
that prove the diff surfaces a downloaded → described transition
between snapshots and that the text renderer emits the new
"described:" row alongside the existing four.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/test_diff.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tests/test_diff.py b/tests/test_diff.py
index 6b28838..c708b7d 100644
--- a/tests/test_diff.py
+++ b/tests/test_diff.py
@@ -581,6 +581,72 @@ def test_diff_media_counts_video_pending_separately(tmp_path: Path) -> None:
     assert report.media.delta_video_pending == 1
 
 
+def test_diff_media_reports_delta_described(tmp_path: Path) -> None:
+    """`xbrain describe` advances downloaded → described; the diff surfaces +N described.
+
+    Setup: snapshot A has 1 downloaded photo. Snapshot B has the same
+    URL transitioned to described (same on-disk bytes, plus the new
+    description payload). The transition shows up as
+    `delta_downloaded=-1, delta_described=+1`.
+    """
+    from xbrain.models import MediaPhotoDescribed, MediaPhotoDownloaded
+
+    a = tmp_path / "a"
+    b = tmp_path / "b"
+    item_a = _item_with_media(
+        "1",
+        [
+            MediaPhotoDownloaded(
+                url="https://pbs.twimg.com/media/A.png",
+                local_path="1/0.png",
+                width=10,
+                height=8,
+                bytes_size=100,
+                downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            )
+        ],
+    )
+    item_b = _item_with_media(
+        "1",
+        [
+            MediaPhotoDescribed(
+                url="https://pbs.twimg.com/media/A.png",
+                local_path="1/0.png",
+                width=10,
+                height=8,
+                bytes_size=100,
+                downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+                is_decorative=False,
+                description="A chart of accuracy by model.",
+                description_lang="English",
+                description_version="v1",
+                described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),
+            )
+        ],
+    )
+    _seed(a, items={"1": item_a}, vocab_slugs=[])
+    _seed(b, items={"1": item_b}, vocab_slugs=[])
+
+    report = diff_snapshots(a, b)
+    assert report.media.a.downloaded == 1
+    assert report.media.a.described == 0
+    assert report.media.b.downloaded == 0
+    assert report.media.b.described == 1
+    assert report.media.delta_downloaded == -1
+    assert report.media.delta_described == 1
+    assert report.summary.media_delta_described == 1
+
+
+def test_diff_media_text_format_includes_described_row(tmp_path: Path) -> None:
+    """The text renderer emits a `described:` row alongside the others."""
+    a = tmp_path / "a"
+    b = tmp_path / "b"
+    _seed(a, items={"1": _item_with_media("1", [])}, vocab_slugs=[])
+    _seed(b, items={"1": _item_with_media("1", [])}, vocab_slugs=[])
+    text = format_text(diff_snapshots(a, b))
+    assert "described:" in text
+
+
 def test_diff_media_text_format_includes_block(tmp_path: Path) -> None:
     """The text renderer includes a `MEDIA` block with the four counters."""
     from xbrain.models import MediaPhotoDownloaded

From d71a30a55a92d2d9f7337c8c28a3a38810f4c2ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 19:38:42 +0200
Subject: [PATCH 07/11] [#34] update README + ARCHITECTURE for the describe
 stage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

README grows a Commands row for describe, a Configuration row for
[describe].model and [describe].version, and a Vision descriptions
subsection in the Local media storage section that explains the
$3-5 budget, the decorative-filter, and the description-version
re-describe trigger.

ARCHITECTURE adds a Per-stage describe section between fetch and
vocab covering the state machine, batching, refusal handling,
failure isolation, snapshot trigger, and the consumption seam in
executors/api.py + topic_synth.py. The rubrics table grows a
describe-image row; invariant #10 names the fifth Media variant and
makes the linear-pipeline shape explicit (Pending → Downloaded →
Described, with Failed as the off-ramp from Pending).

Also bundles two formatter-only cleanups picked up by ruff format
on the executors/api.py and topics.py changes from the prompt
integration commit.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ARCHITECTURE.md             | 34 ++++++++++++++++++++++++++++++++--
 README.md                   | 29 ++++++++++++++++++++++++++++-
 src/xbrain/executors/api.py |  7 ++-----
 src/xbrain/topics.py        |  8 ++------
 4 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index b4fa467..4bdbc15 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -266,6 +266,35 @@ The numbered stages above are summarised; the sections below cover each one in d
 
 **Snapshot trigger.** `xbrain media` always snapshots `data/` first (label `pre-media`), mirroring the destructive-op recovery boundary. The snapshot covers `items.json` / `state.json` / `vocab.yaml` / `topics.json` only — the binary photo bytes under `data/media/` are NOT included; re-downloading via `xbrain media` is the recovery path.
 
+### describe
+
+**What it does.** Sends every downloaded photo to a Claude vision model, asks for a 1-3 sentence prose description plus a `is_decorative` classification, and persists the prose on the entry. The entry transitions from `MediaPhotoDownloaded` to `MediaPhotoDescribed` (a fifth variant on the `MediaEntry` union, on top of the four `media` already exposes). Decorative photos (avatars, reaction memes, abstract backgrounds) are classified as such with an empty description so downstream prompts can filter them out without re-classifying.
+
+**Reads.** `data/items.json` + `data/media/<id>/<n>.<ext>` (the bytes the downloader wrote).
+
+**Writes.** `data/items.json` — each described photo entry carries `is_decorative` + `description` + `description_lang` + `description_version` + `described_at`. No new on-disk binary state; the bytes from the prior `MediaPhotoDownloaded` are inherited verbatim.
+
+**State machine.** Each `xbrain describe` run advances eligible photo entries:
+- `Downloaded` → `Described` (description on the entry, bytes unchanged).
+- `Described` (stale version) → `Described` (current version), automatically.
+- `Described` (current version) → no-op (skipped) unless `--force`.
+
+Eligibility ignores `Pending` / `Failed` / `VideoPending`: describe only runs on photos with bytes on disk. The description-version tag is the rubric-evolution lever: bumping `[describe].version` in `config.toml` invalidates persisted entries so the next run re-describes them without `--force`.
+
+**Batching.** Default batch size is 5 images per API call (the spec's quality / cost sweet spot — ~12-15 % token saving vs per-image, modest added complexity). Override with `--batch-size N`.
+
+**Refusals.** Vision refusals (faces, NSFW) are NOT a hard failure: the entry is persisted as decorative with an empty description, and the run continues. The same `is_decorative` flag downstream consumers already use for "no topic signal" handles the refusal uniformly.
+
+**Failure isolation.** Per-batch error isolation: one failing API call does not abort the run. A total-failure run (every batch errored) raises `RuntimeError` so the CLI surfaces non-zero exit. The orchestrator's `on_progress` callback writes `items.json` between batches so Ctrl-C mid-run leaves the store coherent — same recovery contract as `media`.
+
+**Snapshot trigger.** `xbrain describe` always snapshots `data/` first (label `pre-describe`), mirroring `media`'s recovery boundary. A botched run — wrong model, runaway prompt — can be undone with `xbrain snapshot restore`.
+
+**Feeds the LLM stages.** Once described, the prose is consumed automatically:
+- `xbrain enrich` (in `executors/api.py:_user_prompt`) splices an `Images in this post:` section between the post body and the links/article block when the item has content-bearing described photos. Decoratives are filtered.
+- `xbrain topics` (in `topic_synth.py:_user_prompt`) appends the flat list of content-bearing image descriptions across every post in a topic, after the per-post summaries.
+
+This is how a tweet that is mostly a screenshot of a paper becomes searchable by what the screenshot was actually about.
+
 ### vocab
 
 **What it does.** Induces a closed taxonomy of ~30-45 topics from the whole corpus. Map step: chunks the corpus, asks an LLM to propose candidate topics per chunk. Reduce step: asks the LLM to consolidate the union of candidates down to `vocab.target_count` topics. Always includes a `misc` topic for posts with no thematic core.
@@ -333,7 +362,7 @@ Everything XBrain knows lives in four files inside `data/` (gitignored). They ar
 
 | File | Format | What it is | Mutated by |
 |------|--------|------------|------------|
-| `items.json` | JSON array of `Item` | The source of truth — every post XBrain has ever seen, with all fetched content and enrichment | `extract`, `fetch`, `enrich`, `media` |
+| `items.json` | JSON array of `Item` | The source of truth — every post XBrain has ever seen, with all fetched content, enrichment, and per-photo vision descriptions | `extract`, `fetch`, `enrich`, `media`, `describe` |
 | `state.json` | JSON | Extractor cursors (`last_seen_id`, `last_run`) per source, archive-import marker | `extract`, `import-archive` |
 | `vocab.yaml` | YAML list of `Topic` | The controlled topic taxonomy — closed list of slugs + descriptions | `vocab` |
 | `topics.json` | JSON dict of `TopicPage` | The synthesized topic-page overviews and notes, keyed by slug | `topics` |
@@ -355,6 +384,7 @@ The LLM-driven stages (`vocab`, `enrich`, `topics`) do not have their instructio
 | `rubric-topics.md` | `enrich` | Assign one `primary_topic` + 0-3 secondaries from the closed vocab. Never invent slugs |
 | `rubric-summary.md` | `enrich` | Write a 1-3 sentence summary, faithful to the post and the fetched article, no hallucination |
 | `rubric-topic-page.md` | `topics` | Synthesize 1-3 paragraphs of plain prose + up to 15 short notes per topic, zero wikilinks |
+| `rubric-describe-image.md` | `describe` | Classify each photo as decorative vs content-bearing and describe content-bearing ones in 1-3 sentences. Refusals fall through as decorative with empty description |
 
 **Why a separate file per rubric.** Changing how XBrain summarizes posts is editing one markdown file, not chasing a string through the codebase. The rubric is the *contract* between code and LLM; the code only handles structure, transport and validation.
 
@@ -438,7 +468,7 @@ These are the rules the rest of the architecture rests on. Breaking any of them
 7. **Operation names, not query ids.** The extractor anchors to X GraphQL operation names because X rotates the ids. Anything that hardcodes an id will break.
 8. **Destructive ops are reversible.** Every command that overwrites a `data/` artifact (`vocab --regenerate`, `topics --resynth`, `fetch --force`) snapshots `data/` first to `data/snapshots/<ts>-pre-<command>/`. `xbrain snapshot restore <name>` is the recovery path. A snapshot failure aborts the destructive op.
 9. **Fetch records are tagged unions.** A `ContentSource` on `items.json` is either a `Success` (with required `text`) or a `Failure` (with required `failure_reason`). Mixed shapes are not representable — pydantic rejects them at construction, and mypy rejects them statically (via the `pydantic.mypy` plugin). Legacy records with `ok: bool` (pre-#20) are normalised on read by a `BeforeValidator` on the union, so existing `data/items.json` files keep working without a manual migration. The static contract is pinned by `tests/type_probes/illegal_states.py`.
-10. **Media variants are mutually exclusive states.** A `MediaEntry` on `items.json` is one of `MediaPhotoPending` / `MediaPhotoDownloaded` / `MediaPhotoFailed` / `MediaVideoPending`, discriminated by `kind`. State transitions happen only via `xbrain media`. Legacy records with the flat `{type, url}` shape are normalised on read by a `BeforeValidator` on the union — no manual migration needed. (See the `### media` section above for the retry contract and storage layout.)
+10. **Media variants are mutually exclusive states.** A `MediaEntry` on `items.json` is one of `MediaPhotoPending` / `MediaPhotoDownloaded` / `MediaPhotoFailed` / `MediaPhotoDescribed` / `MediaVideoPending`, discriminated by `kind`. The photo states form a linear pipeline: `Pending → Downloaded → Described` (with `Failed` as the off-ramp from `Pending`). State transitions happen only via `xbrain media` (advances `Pending` and retries `Failed`) and `xbrain describe` (advances `Downloaded` to `Described`). Legacy records with the flat `{type, url}` shape are normalised on read by a `BeforeValidator` on the union — no manual migration needed. (See the `### media` and `### describe` sections above for the per-stage contracts.)
 
 ---
 
diff --git a/README.md b/README.md
index 817b922..aedaf0d 100644
--- a/README.md
+++ b/README.md
@@ -397,6 +397,8 @@ topic_style = "wikilink"                  # wikilink | hashtag (in-body Topics:
 | `[topics]` | `resynth_threshold` | `25` | Post growth that marks a topic overview stale. |
 | `[output]` | `language` | `English` | Output language for LLM summaries/overviews AND wiki section headers. `English` or `Spanish`. |
 | `[output]` | `topic_style` | `wikilink` | How the in-body `**Topics:**` line is rendered: `wikilink` (`[[slug]] · [[slug]]`) or `hashtag` (`#slug #slug`). Frontmatter `tags:` are unaffected. |
+| `[describe]` | `model` | `claude-sonnet-4-6` | Vision model for `xbrain describe`. Override per run with `--model`. |
+| `[describe]` | `version` | `v1` | Tag persisted on every described photo. Bumping invalidates existing descriptions so the next `xbrain describe` re-describes stale entries. |
 
 Switching `[output].language` after the corpus is already enriched is supported
 — but does not retroactively translate existing summaries. To convert the
@@ -517,6 +519,7 @@ uv run xbrain <command> [options]
 | `import-archive <zip>` | Backfill the full own-tweet history from the official X data archive. |
 | `fetch` | Download linked article content, expand threads, fetch linked X content. By default, items whose only previous failures were transient (`timeout`, `dns_error`) are re-fetched automatically; terminal failures (`not_found`, `paywall`, `forbidden`, `js_required`, `empty_content`) stay skipped until `--force`. `--force` re-fetches every external_article source regardless of state. |
 | `media` | Download X-post photos referenced in `Item.media` and render them inline in the wiki. `--force`, `--limit N`, `--items <a,b,c>`, `--verbose`. See [Local media storage](#local-media-storage). |
+| `describe` | Describe downloaded photos with a vision LLM (Claude Sonnet 4.6 by default) and feed the prose into `enrich` + `topics`. `--force`, `--limit N`, `--items <a,b,c>`, `--model`, `--batch-size`, `--verbose`. Idempotent — re-runs skip already-described photos unless `[describe].version` is bumped in `config.toml`. |
 | `vocab` | Induce the topic taxonomy. `--executor`, `--apply <file>`, `--regenerate`. |
 | `enrich` | Enrich items with a summary + topics. `--executor`, `--apply <file>`. |
 | `topics` | Synthesise topic pages. `--executor`, `--apply <file>`, `--resynth`. |
@@ -583,7 +586,31 @@ Failures are categorised on the item itself
   the next `xbrain media` run.
 
 Run `xbrain diff <snapshot>` after a media run to see how many photos
-moved from `pending` / `failed` into `downloaded`.
+moved from `pending` / `failed` into `downloaded` (or, after `xbrain
+describe`, into `described`).
+
+**Vision descriptions**
+
+Once `xbrain media` has the bytes on disk, `xbrain describe` runs every
+photo through Claude vision and stores a short prose description on
+the entry (transitioning `MediaPhotoDownloaded` → `MediaPhotoDescribed`).
+Descriptions are 1-3 sentences, faithful, in the configured
+`output_language`. Decorative photos (avatars, reaction memes,
+abstract backgrounds) are classified as such and persisted with an
+empty description so they introduce no topic noise downstream.
+
+`xbrain enrich` and `xbrain topics` consume the descriptions
+automatically: an item with content-bearing photos gets an
+`Images in this post:` block in the enrichment prompt; topic-page
+synthesis sees the flat list of content image descriptions across the
+topic's posts. This is how a tweet that is mostly a screenshot of a
+paper becomes searchable by what the screenshot was actually about.
+
+Describing the full corpus costs about $3-5 with the default model
+(Sonnet 4.6, 5 images per call). Bump `[describe].version` in
+`config.toml` to invalidate stored descriptions when you change the
+rubric — the next `xbrain describe` run re-describes stale entries
+automatically without `--force`.
 
 ---
 
diff --git a/src/xbrain/executors/api.py b/src/xbrain/executors/api.py
index 4f9c26e..1172aab 100644
--- a/src/xbrain/executors/api.py
+++ b/src/xbrain/executors/api.py
@@ -72,9 +72,7 @@ def _content_image_descriptions(item: Item) -> list[str]:
     return [
         entry.description
         for entry in item.media
-        if isinstance(entry, MediaPhotoDescribed)
-        and not entry.is_decorative
-        and entry.description
+        if isinstance(entry, MediaPhotoDescribed) and not entry.is_decorative and entry.description
     ]
 
 
@@ -100,8 +98,7 @@ def _links_section(item: Item) -> list[str]:
         return []
     lines = [
         "",
-        "Links in the post (the domain is topic signal even when "
-        "the article body is unavailable):",
+        "Links in the post (the domain is topic signal even when the article body is unavailable):",
     ]
     lines += [f"- {ln.url}  (domain: {ln.domain})" for ln in item.links]
     return lines
diff --git a/src/xbrain/topics.py b/src/xbrain/topics.py
index 0efa49f..aea284b 100644
--- a/src/xbrain/topics.py
+++ b/src/xbrain/topics.py
@@ -174,9 +174,7 @@ def topics_needing_synth(
 
 def _collect_summaries(item_pool: list[Item]) -> list[str]:
     """Flatten the non-empty `enriched.summary` strings across a pool of items."""
-    return [
-        item.enriched.summary for item in item_pool if item.enriched and item.enriched.summary
-    ]
+    return [item.enriched.summary for item in item_pool if item.enriched and item.enriched.summary]
 
 
 def _collect_image_descriptions(item_pool: list[Item]) -> list[str]:
@@ -190,9 +188,7 @@ def _collect_image_descriptions(item_pool: list[Item]) -> list[str]:
         entry.description
         for item in item_pool
         for entry in item.media
-        if isinstance(entry, MediaPhotoDescribed)
-        and not entry.is_decorative
-        and entry.description
+        if isinstance(entry, MediaPhotoDescribed) and not entry.is_decorative and entry.description
     ]
 
 

From a215fb68921d785d82c05c03bfb8eacd70f6a5bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 20:22:28 +0200
Subject: [PATCH 08/11] [#34] dedupe SUMMARY + harden MediaPhotoDescribed
 invariants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Drop `_print_partial_failure_summary` from `describe_all`: the CLI's
  `describe_emit_summary_line` is the single source of truth (mirrors
  `media`). Eliminates the double `SUMMARY:` emission on partial-failure
  runs (python-code-reviewer BLOCKER).
- `MediaPhotoDescribed` now carries:
  - `description_lang: SupportedLanguage` (typed alias over
    `i18n.SUPPORTED_LANGUAGES`) — rejects unknown languages at the
    type boundary (type-design-analyzer).
  - Model validator enforcing `is_decorative => description == ""`
    so a hand-constructed variant cannot violate the invariant
    (type-design-analyzer).
  - `described_at` UTC-aware enforcement via field_validator.
- Dedupe the `local_path` traversal guard via a shared free function
  (`_reject_local_path_traversal`); same for the UTC-aware check
  (`_require_utc_aware`). Closes the validator duplication called out
  by python-code-reviewer + code-simplifier — done via free functions
  rather than class inheritance because Liskov substitution would
  silently re-match `MediaPhotoDescribed` against 25+
  `isinstance(entry, MediaPhotoDownloaded)` call sites.
- Add the same UTC-aware contract to `MediaPhotoDownloaded.downloaded_at`
  and `MediaPhotoFailed.last_attempt_at` (closes the pre-existing gap
  for the variants this PR touches).
- Tests: orchestrator no longer emits SUMMARY on partial failure
  (assert `err.count("SUMMARY:") == 0` — pinning the dedup). Test
  helpers for decorative described-photo construction honour the new
  empty-description invariant.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/xbrain/describe.py      |  31 ++----
 src/xbrain/models.py        | 182 ++++++++++++++++++++++++++----------
 tests/test_describe.py      |  16 +++-
 tests/test_executors_api.py |  10 +-
 tests/test_topics.py        |  10 +-
 5 files changed, 169 insertions(+), 80 deletions(-)

diff --git a/src/xbrain/describe.py b/src/xbrain/describe.py
index 61f514e..80a2cb8 100644
--- a/src/xbrain/describe.py
+++ b/src/xbrain/describe.py
@@ -10,15 +10,16 @@
 
 The structure mirrors `xbrain.executors.api` and `xbrain.topic_synth`:
 a recoverable-errors tuple, per-batch failure isolation,
-`logger.warning` on every failure, `RuntimeError` on total failure, and
-a `SUMMARY: described: N, failed: M, skipped: K` stderr line for the
-CLI. Programmer bugs (`AttributeError`) and `KeyboardInterrupt`
-propagate — narrow `except` clauses only.
+`logger.warning` on every failure, and `RuntimeError` on total failure.
+Programmer bugs (`AttributeError`) and `KeyboardInterrupt` propagate
+— narrow `except` clauses only. The `SUMMARY: described: N, failed: M,
+skipped: K` stderr line is emitted exclusively by the CLI via
+`emit_summary_line` (single source of truth, same shape as `media`).
 
 I/O dependencies (the Anthropic client, the media root path) are
 keyword-injectable so tests run offline. The store mutation is in
-place; the caller is expected to wrap each transition with a
-store-write (the `on_progress` callback fires after every batch).
+place; the caller wraps each transition with a store-write
+(the `on_progress` callback fires after every batch).
 """
 
 from __future__ import annotations
@@ -588,7 +589,6 @@ def describe_all(
 
     report.elapsed_seconds = time.monotonic() - started
     _raise_on_total_failure(report)
-    _print_partial_failure_summary(report)
     return report
 
 
@@ -653,23 +653,6 @@ def _raise_on_total_failure(report: DescribeReport) -> None:
         )
 
 
-def _print_partial_failure_summary(report: DescribeReport) -> None:
-    """Print the SUMMARY line on partial failure — mirrors `executors.api`.
-
-    Stays silent on a clean run (no failures = no noise) and on a
-    total-failure run (the raise above is the signal). The line shape
-    matches `xbrain.media.emit_summary_line` so log parsers can rely
-    on a single `SUMMARY:` prefix across all stages.
-    """
-    if report.photos_failed > 0:
-        print(
-            f"SUMMARY: described: {report.photos_described}, "
-            f"failed: {report.photos_failed}, "
-            f"skipped: {report.photos_skipped_already_described}",
-            file=sys.stderr,
-        )
-
-
 def _build_user_blocks(batch: list[_Candidate], media_root: Path) -> list[dict]:
     """Build the user-turn content blocks for one batch: images + directive.
 
diff --git a/src/xbrain/models.py b/src/xbrain/models.py
index b018b1e..39f1cad 100644
--- a/src/xbrain/models.py
+++ b/src/xbrain/models.py
@@ -6,7 +6,21 @@
 from datetime import datetime
 from typing import Annotated, Any, Literal, Union
 
-from pydantic import BaseModel, BeforeValidator, Field, TypeAdapter, field_validator
+from pydantic import (
+    BaseModel,
+    BeforeValidator,
+    Field,
+    TypeAdapter,
+    field_validator,
+    model_validator,
+)
+
+from xbrain.i18n import SUPPORTED_LANGUAGES
+
+# Type alias mirroring `i18n.SUPPORTED_LANGUAGES` so the data layer rejects
+# unknown languages at construction time. `Literal[*tuple]` unpacking is
+# Python 3.11+ — we are on 3.12 — and keeps the source of truth in `i18n`.
+SupportedLanguage = Literal[*SUPPORTED_LANGUAGES]  # type: ignore[valid-type]
 
 logger = logging.getLogger(__name__)
 
@@ -93,6 +107,47 @@ class MediaPhotoPending(_MediaPhotoBase):
     kind: Literal["photo_pending"] = "photo_pending"
 
 
+def _reject_local_path_traversal(value: str) -> str:
+    """Reject absolute paths and `..` components on a `local_path`.
+
+    Shared by `MediaPhotoDownloaded` and `MediaPhotoDescribed` so the
+    on-disk path contract has one source of truth. `local_path` is
+    joined onto `data/media/` at render and download time. A persisted
+    record like ``"/etc/passwd"`` or ``"../../x"`` would let a poisoned
+    `items.json` exfiltrate bytes outside the media root. The downloader
+    never builds such a path, but the persisted store is on-disk plain
+    JSON the user can edit — defence in depth at the type boundary is
+    cheap.
+
+    The two variants share the validator via this free function rather
+    than via class inheritance: the rest of the codebase relies on
+    `isinstance(entry, MediaPhotoDownloaded)` meaning "exactly the
+    Downloaded state" in many call sites (the photo states form a
+    tagged union, not a Liskov hierarchy).
+    """
+    if value.startswith("/") or value.startswith("\\"):
+        raise ValueError(f"local_path must be relative, got {value!r}")
+    # Normalise separators before scanning components so a Windows-style
+    # path persisted on a foreign machine is still caught.
+    for part in value.replace("\\", "/").split("/"):
+        if part == "..":
+            raise ValueError(f"local_path must not contain '..' components: {value!r}")
+    return value
+
+
+def _require_utc_aware(field_name: str, value: datetime) -> datetime:
+    """Reject naive timestamps — every persisted instant must be UTC-aware.
+
+    Shared by every photo-variant timestamp (`downloaded_at`,
+    `described_at`, `last_attempt_at`) so a hand-edited naive record on
+    a foreign machine does not silently miscompare against
+    `now(timezone.utc)`. We do not coerce — that would mask the bug.
+    """
+    if value.tzinfo is None:
+        raise ValueError(f"{field_name} must be timezone-aware, got naive {value!r}")
+    return value
+
+
 class MediaPhotoDownloaded(_MediaPhotoBase):
     """A photo successfully downloaded; the bytes exist at `local_path`.
 
@@ -106,6 +161,9 @@ class MediaPhotoDownloaded(_MediaPhotoBase):
     "downloaded" photo is semantically illegal — Pillow validation in
     `xbrain.media._decode_image` rules out the dim=0 case at the seam,
     and the type constraint pins it at the data layer too.
+
+    `downloaded_at` MUST be timezone-aware (UTC) so a hand-edited naive
+    timestamp does not silently miscompare against `now(timezone.utc)`.
     """
 
     kind: Literal["photo_downloaded"] = "photo_downloaded"
@@ -117,25 +175,15 @@ class MediaPhotoDownloaded(_MediaPhotoBase):
 
     @field_validator("local_path")
     @classmethod
-    def _reject_path_traversal(cls, value: str) -> str:
-        """Reject absolute paths and `..` components.
-
-        `local_path` is joined onto `data/media/` at render and download
-        time. A persisted record like ``"/etc/passwd"`` or ``"../../x"``
-        would let a poisoned `items.json` exfiltrate bytes outside the
-        media root. The downloader code never builds such a path, but
-        the persisted store is on-disk plain JSON the user can edit —
-        defence in depth at the type boundary is cheap.
-        """
+    def _validate_local_path(cls, value: str) -> str:
         _ = cls  # required by @field_validator+@classmethod; placate vulture
-        if value.startswith("/") or value.startswith("\\"):
-            raise ValueError(f"local_path must be relative, got {value!r}")
-        # Normalise separators before scanning components so a Windows-style
-        # path persisted on a foreign machine is still caught.
-        for part in value.replace("\\", "/").split("/"):
-            if part == "..":
-                raise ValueError(f"local_path must not contain '..' components: {value!r}")
-        return value
+        return _reject_local_path_traversal(value)
+
+    @field_validator("downloaded_at")
+    @classmethod
+    def _validate_downloaded_at(cls, value: datetime) -> datetime:
+        _ = cls
+        return _require_utc_aware("downloaded_at", value)
 
 
 class MediaPhotoFailed(_MediaPhotoBase):
@@ -147,6 +195,9 @@ class MediaPhotoFailed(_MediaPhotoBase):
     each transient retry bumps it. `attempts` is `ge=1`: a "failed but
     never attempted" record is semantically nonsense, and the downloader
     increments before producing any `MediaPhotoFailed`.
+
+    `last_attempt_at` MUST be timezone-aware (UTC) — same contract as
+    every other persisted instant on the photo variants.
     """
 
     kind: Literal["photo_failed"] = "photo_failed"
@@ -155,29 +206,49 @@ class MediaPhotoFailed(_MediaPhotoBase):
     attempts: int = Field(ge=1)
     last_attempt_at: datetime
 
+    @field_validator("last_attempt_at")
+    @classmethod
+    def _validate_last_attempt_at(cls, value: datetime) -> datetime:
+        _ = cls
+        return _require_utc_aware("last_attempt_at", value)
+
 
 class MediaPhotoDescribed(_MediaPhotoBase):
     """A downloaded photo that has also been described by a vision LLM.
 
     The terminal state for a content-bearing photo: the bytes still live
-    at `local_path` (every `MediaPhotoDownloaded` invariant carries over)
-    AND a vision pass has classified the image and written a short prose
-    description. Decorative photos (avatars, reaction memes, abstract
-    backgrounds) reach this variant too — they carry `is_decorative=True`
-    and an empty `description`, so downstream callers can filter them out
+    at `local_path` (every `MediaPhotoDownloaded` invariant carries over
+    verbatim — `local_path` + dimensions + `bytes_size` + `downloaded_at`
+    are re-declared so the field shape is wire-identical) AND a vision
+    pass has classified the image and written a short prose description.
+    Decorative photos (avatars, reaction memes, abstract backgrounds)
+    reach this variant too — they carry `is_decorative=True` and an
+    empty `description`, so downstream callers can filter them out
     without re-classifying.
 
+    Why NOT inherit from `MediaPhotoDownloaded`? The photo states form a
+    pydantic tagged union, not a Liskov class hierarchy. 25+ call sites
+    use `isinstance(entry, MediaPhotoDownloaded)` to mean "exactly the
+    Downloaded state" (eligibility checks, diff counters, generator
+    routing). Inheritance would silently re-match every Described entry
+    against those checks. We dedupe the field-traversal validator and
+    the UTC-aware validator via free functions instead.
+
     `description_version` is the rubric/version tag the description was
     produced under. Bumping the configured version invalidates existing
     entries: the next `xbrain describe` run treats them as stale and
-    re-describes (no `--force` needed). `description_lang` records the
-    language the prose was written in so a vault that switches
-    `output_language` mid-corpus can be re-described selectively.
-
-    `description` is empty for decorative photos by contract — the
-    vision rubric returns an empty string in that case and a refusal
-    (faces, NSFW) is also bucketed as decorative with empty description.
-    No `gt=0` constraint here for that reason.
+    re-describes (no `--force` needed). `description_lang` is typed as
+    `SupportedLanguage` so an unknown language is rejected at
+    construction — the type alias is derived from `i18n.SUPPORTED_LANGUAGES`
+    (single source of truth).
+
+    `description` is empty for decorative photos by contract — enforced
+    by a model-level validator below so a hand-constructed variant
+    cannot violate the invariant. The vision rubric returns an empty
+    string in the decorative case and a refusal (faces, NSFW) is also
+    bucketed as decorative with empty description.
+
+    `described_at` and `downloaded_at` MUST be timezone-aware (UTC).
     """
 
     kind: Literal["photo_described"] = "photo_described"
@@ -188,28 +259,43 @@ class MediaPhotoDescribed(_MediaPhotoBase):
     downloaded_at: datetime
     is_decorative: bool
     description: str
-    description_lang: str
+    description_lang: SupportedLanguage
     description_version: str
     described_at: datetime
 
     @field_validator("local_path")
     @classmethod
-    def _reject_path_traversal(cls, value: str) -> str:
-        """Reject absolute paths and `..` components.
-
-        Mirrors `MediaPhotoDownloaded._reject_path_traversal` — the
-        described variant inherits the same on-disk path contract because
-        the bytes from the prior Downloaded state are still referenced
-        verbatim. Persisted `items.json` is on-disk plain JSON the user
-        can edit, so the defence in depth is reapplied here.
-        """
+    def _validate_local_path(cls, value: str) -> str:
         _ = cls
-        if value.startswith("/") or value.startswith("\\"):
-            raise ValueError(f"local_path must be relative, got {value!r}")
-        for part in value.replace("\\", "/").split("/"):
-            if part == "..":
-                raise ValueError(f"local_path must not contain '..' components: {value!r}")
-        return value
+        return _reject_local_path_traversal(value)
+
+    @field_validator("downloaded_at")
+    @classmethod
+    def _validate_downloaded_at(cls, value: datetime) -> datetime:
+        _ = cls
+        return _require_utc_aware("downloaded_at", value)
+
+    @field_validator("described_at")
+    @classmethod
+    def _validate_described_at(cls, value: datetime) -> datetime:
+        _ = cls
+        return _require_utc_aware("described_at", value)
+
+    @model_validator(mode="after")
+    def _decorative_implies_empty_description(self) -> "MediaPhotoDescribed":
+        """A decorative photo MUST have an empty description (rubric contract).
+
+        Enforced at the type boundary so downstream callers can rely on
+        `is_decorative => not description` without re-validating. The
+        producer (`describe._apply_judgment`) already overwrites the
+        description to "" on decorative judgments; this is the
+        defence-in-depth for hand-edited records.
+        """
+        if self.is_decorative and self.description != "":
+            raise ValueError(
+                f"is_decorative=True requires an empty description, got {self.description!r}"
+            )
+        return self
 
 
 class MediaVideoPending(BaseModel):
diff --git a/tests/test_describe.py b/tests/test_describe.py
index a3a8ff0..5072b95 100644
--- a/tests/test_describe.py
+++ b/tests/test_describe.py
@@ -516,8 +516,10 @@ def test_describe_all_isolates_a_batch_error(tmp_path: Path, capsys):
     assert report.photos_described == 1
     assert report.photos_failed == 1
     assert report.batches_failed == 1
+    # The orchestrator does not emit SUMMARY — the CLI does. Verify the
+    # orchestrator stays silent so the CLI is the single source of truth.
     err = capsys.readouterr().err
-    assert "SUMMARY: described: 1, failed: 1" in err
+    assert "SUMMARY:" not in err
 
 
 def test_describe_all_raises_when_every_batch_fails(tmp_path: Path):
@@ -719,8 +721,14 @@ def test_describe_all_handles_missing_file_as_per_batch_failure(tmp_path: Path):
     assert report.photos_failed == 1
 
 
-def test_describe_all_emits_summary_line_on_partial_failure(tmp_path: Path, capsys):
-    """Partial-failure runs emit a SUMMARY line; same convention as `media`."""
+def test_describe_all_does_not_emit_summary_on_partial_failure(tmp_path: Path, capsys):
+    """The orchestrator never emits SUMMARY — the CLI is the single source of truth.
+
+    Mirrors `media.download_all`: SUMMARY lives on `emit_summary_line`,
+    not on the orchestrator. Asserts the count is exactly zero on a
+    partial-failure run so a future regression that re-introduces a
+    second emitter (double-SUMMARY) fails this test.
+    """
     from anthropic import APIError
 
     media_root = tmp_path / "media"
@@ -745,7 +753,7 @@ def test_describe_all_emits_summary_line_on_partial_failure(tmp_path: Path, caps
         client=client,
     )
     err = capsys.readouterr().err
-    assert "SUMMARY: described: 1, failed: 1" in err
+    assert err.count("SUMMARY:") == 0
 
 
 def test_describe_all_emits_no_summary_when_all_succeed(tmp_path: Path, capsys):
diff --git a/tests/test_executors_api.py b/tests/test_executors_api.py
index ca3dd47..7568e80 100644
--- a/tests/test_executors_api.py
+++ b/tests/test_executors_api.py
@@ -197,7 +197,13 @@ def test_api_executor_emits_no_summary_when_all_succeed(capsys):
 
 
 def _described_photo(*, description: str, decorative: bool = False):
-    """Build a `MediaPhotoDescribed` for prompt-integration tests."""
+    """Build a `MediaPhotoDescribed` for prompt-integration tests.
+
+    `MediaPhotoDescribed` enforces `is_decorative => description == ""`
+    at the model layer; this helper honours that contract by forcing an
+    empty description when `decorative=True`, so the caller's `description`
+    argument is silently dropped in the decorative branch.
+    """
     from datetime import datetime, timezone
 
     from xbrain.models import MediaPhotoDescribed
@@ -210,7 +216,7 @@ def _described_photo(*, description: str, decorative: bool = False):
         bytes_size=512,
         downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
         is_decorative=decorative,
-        description=description,
+        description="" if decorative else description,
         description_lang="English",
         description_version="v1",
         described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),
diff --git a/tests/test_topics.py b/tests/test_topics.py
index 75db3a3..52405c1 100644
--- a/tests/test_topics.py
+++ b/tests/test_topics.py
@@ -198,7 +198,13 @@ def test_build_topic_inputs_rejects_an_unknown_slug():
 
 
 def _described_photo_in_topic(*, description: str, decorative: bool = False):
-    """Build a `MediaPhotoDescribed` for the topic-inputs filtering test."""
+    """Build a `MediaPhotoDescribed` for the topic-inputs filtering test.
+
+    `MediaPhotoDescribed` enforces `is_decorative => description == ""`
+    at the model layer; this helper honours that contract by forcing an
+    empty description when `decorative=True`, so the caller's `description`
+    argument is silently dropped in the decorative branch.
+    """
     from datetime import datetime, timezone
 
     from xbrain.models import MediaPhotoDescribed
@@ -211,7 +217,7 @@ def _described_photo_in_topic(*, description: str, decorative: bool = False):
         bytes_size=512,
         downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
         is_decorative=decorative,
-        description=description,
+        description="" if decorative else description,
         description_lang="English",
         description_version="v1",
         described_at=datetime(2026, 5, 24, 12, tzinfo=timezone.utc),

From 260c883a98e9c56a3c9fdec3762bff78c673eb13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 20:32:31 +0200
Subject: [PATCH 09/11] [#34] refactor orchestrator + SDK refusal + language
 drift + Protocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convergent fixes from the round-1 reviewer pipeline.

Category 2.1 mixed-language drift (type-design-analyzer + silent-failure-hunter):
- `_is_stale` now also returns True when `description_lang` drifted vs
  the configured output language. Switching `output_language` between
  Spanish and English no longer leaves stale-language prose persisted —
  the next run re-describes those entries.
- `output_language` parameter is typed `SupportedLanguage`, matching
  the new `MediaPhotoDescribed.description_lang` field type. The
  orchestrator now propagates a typed language end-to-end.

Category 3 over-decomposed orchestrator (code-simplifier):
- Collapsed the four-helper candidate chain (`_tally_skipped` +
  `_iter_item_candidates` + `_take_with_limit` + outer
  `_iter_candidates` + inner `_all_eligible` closure) into a single
  `_iter_eligible_candidates` generator, mirroring `media._iter_eligible_attempts`.
  A small `_tally_idempotency_skip` helper keeps the loop body
  under radon grade C; `_filtered_items` keeps the outer scan flat.
- Removed `_take_with_limit`, `_chunked`, `_messages_create`, and
  `_user_directive` as standalone helpers. `itertools.batched(...)`
  replaces `_chunked`; the SDK call is inlined; the directive is a
  small dict literal builder local to the block building.

Category 1.3 dead defence-in-depth (python-code-reviewer):
- Dropped the unreachable duplicate-judgment-index guard in
  `_apply_batch_judgments`. The parser already rejects duplicate
  indices, making the dict-construction guard dead code.

Category 4 silent-failure regressions (silent-failure-hunter):
- 4.1: SDK `stop_reason="refusal"` now transitions every photo in the
  batch to `MediaPhotoDescribed(is_decorative=True, description="")`.
  Satisfies the rubric contract at the SDK level so refused batches
  make progress instead of churning.
- 4.2: `JSONDecodeError` now logs `response.stop_reason` so a
  max-tokens truncation is diagnosable from the warning line alone.
- 4.3: Unknown extension in `_media_type` now emits
  `logger.warning` before falling back to `image/jpeg`.
- 4.4: A batch where the model omitted some judgments now bumps
  `batches_failed += 1` — the batch did not return complete data, so
  the operator's "did this API call do its job" view is honest.

Category 5.3 type design (type-design-analyzer):
- Defined `MessagesClient` + `VisionClient` Protocols local to
  `describe.py`. Dropped the `# type: ignore[attr-defined]` on
  `client.messages.create(...)`.

Category 6 tests (pr-test-analyzer):
- 6.1: New full-payload kwarg shape assertion (model, max_tokens,
  system, role, image+text block ordering, base64 source layout).
- 6.2: `test_describe_all_handles_refusal_as_decorative_empty` plus a
  dedicated `_FakeRefusalResponse` fake.
- 6.3: `test_parse_batch_response_strips_fence_without_language_tag`.
- 6.4: `--verbose` CLI test exercising the per-failure listing.
- 6.5: New mixed-language eligibility test
  (`test_eligible_described_stale_language_is_eligible`) and CLI
  partial-failure SUMMARY count test pinning to exactly one.
- Plus refactored `_FakeMessagesList` so pre-built response objects
  (refusal, truncation) bypass the JSON-list wrapper.

Category 7 comments + docs:
- 7.1: Fixed misleading `_MEDIA_TYPES` comment (was "three", actual
  cardinality is 4 keys mapping to 3 distinct media types).
- 7.2: Rewrote `_run_describe` docstring — coherence is held by the
  outer `try/finally`, not by `on_progress`.
- 7.3: Trimmed the `describe.py` module docstring to 2 paragraphs
  matching the project voice (was 4 paragraphs, 22 lines).
- 7.4: Reworded ARCHITECTURE.md `### describe` ("a new variant on
  the `MediaEntry` union"). Documented the language-staleness
  contract in the state-machine doc.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ARCHITECTURE.md        |   8 +-
 src/xbrain/cli.py      |  12 +-
 src/xbrain/describe.py | 521 ++++++++++++++++++++++++-----------------
 tests/test_cli.py      | 201 ++++++++++++++++
 tests/test_describe.py | 257 +++++++++++++++++++-
 5 files changed, 760 insertions(+), 239 deletions(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 4bdbc15..1fb2340 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -268,7 +268,7 @@ The numbered stages above are summarised; the sections below cover each one in d
 
 ### describe
 
-**What it does.** Sends every downloaded photo to a Claude vision model, asks for a 1-3 sentence prose description plus a `is_decorative` classification, and persists the prose on the entry. The entry transitions from `MediaPhotoDownloaded` to `MediaPhotoDescribed` (a fifth variant on the `MediaEntry` union, on top of the four `media` already exposes). Decorative photos (avatars, reaction memes, abstract backgrounds) are classified as such with an empty description so downstream prompts can filter them out without re-classifying.
+**What it does.** Sends every downloaded photo to a Claude vision model, asks for a 1-3 sentence prose description plus a `is_decorative` classification, and persists the prose on the entry. The entry transitions from `MediaPhotoDownloaded` to `MediaPhotoDescribed` (a new variant on the `MediaEntry` union). Decorative photos (avatars, reaction memes, abstract backgrounds) are classified as such with an empty description so downstream prompts can filter them out without re-classifying.
 
 **Reads.** `data/items.json` + `data/media/<id>/<n>.<ext>` (the bytes the downloader wrote).
 
@@ -276,10 +276,10 @@ The numbered stages above are summarised; the sections below cover each one in d
 
 **State machine.** Each `xbrain describe` run advances eligible photo entries:
 - `Downloaded` → `Described` (description on the entry, bytes unchanged).
-- `Described` (stale version) → `Described` (current version), automatically.
-- `Described` (current version) → no-op (skipped) unless `--force`.
+- `Described` (stale version OR stale language) → `Described` (current version + current language), automatically.
+- `Described` (current version + current language) → no-op (skipped) unless `--force`.
 
-Eligibility ignores `Pending` / `Failed` / `VideoPending`: describe only runs on photos with bytes on disk. The description-version tag is the rubric-evolution lever: bumping `[describe].version` in `config.toml` invalidates persisted entries so the next run re-describes them without `--force`.
+Eligibility ignores `Pending` / `Failed` / `VideoPending`: describe only runs on photos with bytes on disk. The description-version tag is the rubric-evolution lever: bumping `[describe].version` in `config.toml` invalidates persisted entries so the next run re-describes them without `--force`. The `description_lang` check is the mixed-vault guard: switching `[paths].output_language` from Spanish → English (or back) marks every previously-described entry stale so the enrich prompt never splices the wrong-language prose into a new vault.
 
 **Batching.** Default batch size is 5 images per API call (the spec's quality / cost sweet spot — ~12-15 % token saving vs per-image, modest added complexity). Override with `--batch-size N`.
 
diff --git a/src/xbrain/cli.py b/src/xbrain/cli.py
index 3d9e3be..7a26062 100644
--- a/src/xbrain/cli.py
+++ b/src/xbrain/cli.py
@@ -375,12 +375,12 @@ def _run_describe(
 
     Always snapshots `data/` first (the same recovery boundary as
     `xbrain media`): a botched run — a wrong model, a runaway prompt
-    — can be undone with `xbrain snapshot restore`. Persistence
-    happens twice for the same reason as `_run_media`: once after
-    every batch (the `on_progress` callback fires between batches so
-    Ctrl-C mid-run leaves `items.json` coherent), and once
-    unconditionally at the end so the elapsed-described timestamps
-    are captured even if the orchestrator raised on total failure.
+    — can be undone with `xbrain snapshot restore`. Coherence on a
+    Ctrl-C mid-run is held by the outer `try/finally` below, which
+    saves the store unconditionally even when the orchestrator raises;
+    the `on_progress` callback is for incremental persistence between
+    batches on a clean run (so a long describe run never loses more
+    than one batch of work to a process death).
     """
     if items_filter:
         target = set(items_filter)
diff --git a/src/xbrain/describe.py b/src/xbrain/describe.py
index 80a2cb8..fd7d675 100644
--- a/src/xbrain/describe.py
+++ b/src/xbrain/describe.py
@@ -3,29 +3,23 @@
 The `describe_all` orchestrator walks every photo entry the downloader
 has produced, batches the bytes into vision-API calls (default: 5 images
 per call to Claude Sonnet), parses the per-image JSON judgments, and
-transitions matched entries to `MediaPhotoDescribed`. The persisted
-description is consumed by the enrich-time prompt in `executors.api`
-and the topic-synth prompt in `topic_synth` — decorative photos are
-filtered out at that consumption seam so they introduce no topic noise.
-
-The structure mirrors `xbrain.executors.api` and `xbrain.topic_synth`:
-a recoverable-errors tuple, per-batch failure isolation,
-`logger.warning` on every failure, and `RuntimeError` on total failure.
-Programmer bugs (`AttributeError`) and `KeyboardInterrupt` propagate
-— narrow `except` clauses only. The `SUMMARY: described: N, failed: M,
+transitions matched entries to `MediaPhotoDescribed`. Decorative photos
+(avatars, reaction memes) are filtered at the downstream consumption
+seam so they introduce no topic noise.
+
+The structure mirrors `xbrain.executors.api` and `xbrain.media`:
+recoverable-errors tuple, per-batch failure isolation, `logger.warning`
+on every failure, `RuntimeError` on total failure. Programmer bugs and
+`KeyboardInterrupt` propagate. The `SUMMARY: described: N, failed: M,
 skipped: K` stderr line is emitted exclusively by the CLI via
-`emit_summary_line` (single source of truth, same shape as `media`).
-
-I/O dependencies (the Anthropic client, the media root path) are
-keyword-injectable so tests run offline. The store mutation is in
-place; the caller wraps each transition with a store-write
-(the `on_progress` callback fires after every batch).
+`emit_summary_line` (single source of truth).
 """
 
 from __future__ import annotations
 
 import base64
 import io
+import itertools
 import json
 import logging
 import sys
@@ -34,11 +28,13 @@
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Protocol
 
 from xbrain.models import (
     Item,
     MediaPhotoDescribed,
     MediaPhotoDownloaded,
+    SupportedLanguage,
 )
 from xbrain.rubrics import load_rubric
 
@@ -56,7 +52,10 @@
 _MAX_TOKENS = 1200
 
 # Map file extensions to the Anthropic vision media-type strings. The
-# downloader writes one of these three (see `xbrain.media._FORMAT_EXTENSIONS`).
+# downloader writes one of these four (`.jpg` is mapped twice — once for
+# `.jpg`, once for `.jpeg` — so the cardinality of distinct media types
+# is three: image/jpeg, image/png, image/webp). See
+# `xbrain.media._FORMAT_EXTENSIONS` for the producer side.
 _MEDIA_TYPES: dict[str, str] = {
     ".jpg": "image/jpeg",
     ".jpeg": "image/jpeg",
@@ -65,6 +64,32 @@
 }
 
 
+class MessagesClient(Protocol):
+    """Minimal structural type for the Anthropic SDK `client.messages` seam.
+
+    Defined locally so the orchestrator does not need to type-ignore an
+    untyped `client.messages.create(...)` call. The real
+    `anthropic.Anthropic().messages` satisfies this Protocol; the test
+    fakes (`tests.conftest.FakeAnthropic`, `_FakeVisionClient`) do too.
+    """
+
+    def create(self, **kwargs: object) -> object:
+        """Send one Anthropic `messages.create` call; return the raw response."""
+        ...
+
+
+class VisionClient(Protocol):
+    """Minimal structural type for the Anthropic SDK client itself.
+
+    Drops the `# type: ignore[attr-defined]` that would otherwise be
+    needed on `client.messages.create(...)`. The protocol is local to
+    this module because describe is the only consumer; `executors.api`
+    still uses a duck-typed client for now (out of scope for this PR).
+    """
+
+    messages: MessagesClient
+
+
 def _utcnow() -> datetime:
     """Default `now` clock — UTC-aware `datetime.now()`.
 
@@ -83,9 +108,10 @@ class DescribeReport:
     no-op re-run with no version bump must report every previously
     described photo here. `batches_attempted` counts API calls actually
     issued; `batches_failed` counts the ones the recoverable-errors
-    tuple swallowed. A run with `photos_attempted > 0 and
-    photos_described == 0` raises before this report leaves the
-    orchestrator.
+    tuple swallowed PLUS batches where the SDK refused or the model
+    returned fewer judgments than the batch size. A run with
+    `photos_attempted > 0 and photos_described == 0` raises before
+    this report leaves the orchestrator.
     """
 
     items_processed: int = 0
@@ -107,8 +133,8 @@ class _Candidate:
 
     Holds back-references to `Item` and the media-list `index` so the
     orchestrator can swap the transitioned variant back into place
-    without re-scanning the store. `bytes_data` is loaded lazily by
-    `_load_bytes` — failing to read the file is a per-photo failure,
+    without re-scanning the store. Bytes are loaded lazily by
+    `_load_bytes` — failing to read the file is a per-batch failure,
     not a total-run abort.
     """
 
@@ -142,32 +168,44 @@ def _recoverable_errors() -> tuple[type[Exception], ...]:
         return (ValueError, json.JSONDecodeError, KeyError, OSError)
 
 
-def _is_stale(entry: MediaPhotoDescribed, *, current_version: str) -> bool:
-    """A described entry is stale when its version no longer matches the config.
+def _is_stale(
+    entry: MediaPhotoDescribed,
+    *,
+    current_version: str,
+    current_language: str,
+) -> bool:
+    """A described entry is stale when its version OR language drifted.
+
+    Two triggers, both no-`--force`:
+
+    1. `description_version != current_version` — bumping
+       `[describe].version` in `config.toml` invalidates the corpus
+       against a new rubric.
+    2. `description_lang != current_language` — switching
+       `[paths].output_language` from Spanish to English (or back)
+       leaves stale-language prose in place that would otherwise
+       drift into the enrich prompt as a mixed-language vault.
 
-    Bumping `describe_version` in `config.toml` is the manual trigger to
-    re-describe the whole corpus against a new rubric — no `--force`
-    needed. The version check is exact-string: there is no ordering
-    relation between versions, only equality, so a deliberate downgrade
-    is also a "describe again" signal.
+    Equality is exact-string for both: there is no ordering relation
+    between versions or languages, so a deliberate downgrade also
+    triggers re-describe.
     """
-    return entry.description_version != current_version
+    if entry.description_version != current_version:
+        return True
+    return entry.description_lang != current_language
 
 
-def _eligible(
-    entry: object,
-    *,
-    force: bool,
-    current_version: str,
+def _is_eligible(
+    entry: object, *, force: bool, current_version: str, current_language: str
 ) -> bool:
     """Decide whether `describe_all` should attempt this entry on THIS run.
 
     `MediaPhotoDownloaded` entries are always eligible — they have not
     been described yet by definition. `MediaPhotoDescribed` entries are
-    eligible only when `--force` is set or the persisted version is
-    stale vs the current `describe_version` config. Every other
-    variant (`MediaPhotoPending`, `MediaPhotoFailed`, `MediaVideoPending`)
-    is out of scope — describing only runs over photos whose bytes are
+    eligible only when `--force` is set or the persisted version/language
+    is stale vs the current config. Every other variant
+    (`MediaPhotoPending`, `MediaPhotoFailed`, `MediaVideoPending`) is
+    out of scope — describing only runs over photos whose bytes are
     already on disk.
     """
     if isinstance(entry, MediaPhotoDownloaded):
@@ -175,139 +213,130 @@ def _eligible(
     if isinstance(entry, MediaPhotoDescribed):
         if force:
             return True
-        return _is_stale(entry, current_version=current_version)
+        return _is_stale(
+            entry,
+            current_version=current_version,
+            current_language=current_language,
+        )
     return False
 
 
-def _tally_skipped(
+def _tally_idempotency_skip(
     entry: object,
     *,
     current_version: str,
+    current_language: str,
     report: DescribeReport,
 ) -> None:
-    """Bump `photos_skipped_already_described` when this entry is a no-op skip.
+    """Bump `photos_skipped_already_described` for same-version+same-language describes.
 
-    Pulled out of the candidate iterator so the loop body in
-    `_iter_candidates` keeps a low complexity grade. Only described
-    entries on the current version count as skips — pending/failed/
-    video entries are silently out of scope for `xbrain describe`.
+    Only fresh `MediaPhotoDescribed` entries count as idempotency skips.
+    Pending/failed/video entries are silently out of scope for
+    `xbrain describe`. Pulled out of the candidate iterator so the loop
+    body stays under radon grade C.
     """
     if isinstance(entry, MediaPhotoDescribed) and not _is_stale(
-        entry, current_version=current_version
+        entry,
+        current_version=current_version,
+        current_language=current_language,
     ):
         report.photos_skipped_already_described += 1
 
 
-def _iter_item_candidates(
-    item_id: str,
-    item: Item,
-    *,
-    force: bool,
-    current_version: str,
-    report: DescribeReport,
-) -> Iterator[_Candidate]:
-    """Yield every eligible candidate inside one item's media list.
+def _filtered_items(
+    items: dict[str, Item],
+    items_filter: set[str] | None,
+) -> Iterator[tuple[str, Item]]:
+    """Yield `(item_id, item)` pairs the run should scan, skipping empty media.
 
-    Pulled out of the cross-item iterator so the outer scan stays
-    flat. Per-entry tallies (skips) go on the report; the caller
-    decides whether the item itself counts as processed by checking
-    if this iterator yielded anything.
+    Pulled out so `_iter_eligible_candidates` stays under radon grade C.
+    An `items_filter` of `None` is "every item"; an empty media list
+    is silently skipped (no photos to consider).
     """
-    for index, entry in enumerate(item.media):
-        if not _eligible(entry, force=force, current_version=current_version):
-            _tally_skipped(entry, current_version=current_version, report=report)
+    for item_id, item in items.items():
+        if items_filter is not None and item_id not in items_filter:
             continue
-        assert isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed))
-        yield _Candidate(item_id=item_id, item=item, index=index, entry=entry)
-
-
-def _take_with_limit(candidates: Iterator[_Candidate], limit: int | None) -> Iterator[_Candidate]:
-    """Yield from `candidates`, stopping after `limit` items.
-
-    Pulled out so `_iter_candidates` does not interleave the
-    limit-countdown with the per-item bookkeeping. `None` means "no
-    limit" — yields everything.
-    """
-    if limit is None:
-        yield from candidates
-        return
-    remaining = limit
-    for candidate in candidates:
-        if remaining <= 0:
-            return
-        remaining -= 1
-        yield candidate
+        if not item.media:
+            continue
+        yield item_id, item
 
 
-def _iter_candidates(
+def _iter_eligible_candidates(
     items: dict[str, Item],
     *,
     force: bool,
     limit: int | None,
     items_filter: set[str] | None,
     current_version: str,
+    current_language: str,
     report: DescribeReport,
 ) -> Iterator[_Candidate]:
-    """Yield each candidate eligible for description on THIS run.
-
-    Side effects on `report`: bumps `items_processed` once per item
-    that contributes at least one yielded candidate, and bumps
-    `photos_skipped_already_described` for each `MediaPhotoDescribed`
-    entry passed over (via `_tally_skipped`). Stops yielding once
-    `limit` is exhausted.
-
-    `items_processed` is bumped on the FIRST yielded candidate of an
-    item, not at the end of its scan — that way a `limit` that
-    truncates mid-item still counts the item as processed (work
-    happened on it). Items whose every photo was skipped do NOT count
-    as processed.
-    """
-    seen_item_ids: set[str] = set()
+    """Yield each candidate eligible for description, with all bookkeeping inline.
 
-    def _all_eligible() -> Iterator[_Candidate]:
-        for item_id, item in items.items():
-            if items_filter is not None and item_id not in items_filter:
-                continue
-            if not item.media:
-                continue
-            yield from _iter_item_candidates(
-                item_id,
-                item,
-                force=force,
-                current_version=current_version,
-                report=report,
-            )
-
-    for candidate in _take_with_limit(_all_eligible(), limit):
-        if candidate.item_id not in seen_item_ids:
-            seen_item_ids.add(candidate.item_id)
-            report.items_processed += 1
-        yield candidate
+    Side effects on `report` (mirrors `media._iter_eligible_attempts`):
 
+    - bumps `items_processed` once per item that contributes at least
+      one yielded candidate (first yielded candidate of an item — a
+      `limit` that truncates mid-item still counts the item, items
+      whose every photo was skipped do NOT count);
+    - bumps `photos_skipped_already_described` for each
+      `MediaPhotoDescribed` entry on the current version+language that
+      gets passed over (via `_tally_idempotency_skip`).
 
-def _chunked(candidates: list[_Candidate], size: int) -> Iterator[list[_Candidate]]:
-    """Split a candidate list into batches of at most `size` entries.
-
-    `size` is guaranteed positive by the CLI layer (`Typer` validates
-    integer ranges); this helper does not re-validate. An empty input
-    yields nothing — the orchestrator never issues a no-op API call.
+    Stops yielding once `limit` is exhausted. Replaces the four-helper
+    chain that was here before (`_tally_skipped` + `_iter_item_candidates`
+    + `_take_with_limit` + outer `_iter_candidates`).
     """
-    for start in range(0, len(candidates), size):
-        yield candidates[start : start + size]
+    remaining = limit
+    seen_item_ids: set[str] = set()
+    for item_id, item in _filtered_items(items, items_filter):
+        for index, entry in enumerate(item.media):
+            if remaining is not None and remaining <= 0:
+                return
+            if not _is_eligible(
+                entry,
+                force=force,
+                current_version=current_version,
+                current_language=current_language,
+            ):
+                _tally_idempotency_skip(
+                    entry,
+                    current_version=current_version,
+                    current_language=current_language,
+                    report=report,
+                )
+                continue
+            assert isinstance(entry, (MediaPhotoDownloaded, MediaPhotoDescribed))
+            if item_id not in seen_item_ids:
+                seen_item_ids.add(item_id)
+                report.items_processed += 1
+            if remaining is not None:
+                remaining -= 1
+            yield _Candidate(item_id=item_id, item=item, index=index, entry=entry)
 
 
 def _media_type(local_path: str) -> str:
     """Map an on-disk path's extension to its Anthropic media-type string.
 
-    The downloader writes one of `.jpg` / `.png` / `.webp` (see
-    `xbrain.media._FORMAT_EXTENSIONS`). Anything else came from a hand
-    edit of `items.json` or a future format we have not registered —
-    fall back to `image/jpeg` and let Anthropic reject it if the bytes
-    do not match. The fall-back keeps a per-photo failure out of the
-    total-failure raise path.
+    The downloader writes one of `.jpg` / `.jpeg` / `.png` / `.webp`
+    (see `xbrain.media._FORMAT_EXTENSIONS`). Anything else came from a
+    hand edit of `items.json` or a future format we have not registered
+    — we emit a `logger.warning` (so the operator can see the wrong
+    MIME type was sent) and fall back to `image/jpeg`. Anthropic will
+    reject the request if the bytes do not match; that surfaces as a
+    per-batch failure rather than a silent total-failure raise.
     """
     suffix = Path(local_path).suffix.lower()
-    return _MEDIA_TYPES.get(suffix, "image/jpeg")
+    media_type = _MEDIA_TYPES.get(suffix)
+    if media_type is None:
+        logger.warning(
+            "describe: unknown extension %r for local_path %s; "
+            "sending as image/jpeg (Anthropic may reject)",
+            suffix,
+            local_path,
+        )
+        return "image/jpeg"
+    return media_type
 
 
 def _load_bytes(media_root: Path, local_path: str) -> bytes:
@@ -315,7 +344,7 @@ def _load_bytes(media_root: Path, local_path: str) -> bytes:
 
     Raises `OSError` (a `FileNotFoundError` subclass when the file is
     missing). The orchestrator's `_recoverable_errors` tuple catches it
-    so a missing file is a per-photo failure (the operator can re-run
+    so a missing file is a per-batch failure (the operator can re-run
     `xbrain media` to repopulate), never a whole-batch abort.
     """
     return (media_root / local_path).read_bytes()
@@ -343,20 +372,6 @@ def _system_prompt(language: str) -> str:
     return load_rubric("describe-image", language=language)
 
 
-def _user_directive(batch_size: int) -> str:
-    """Plain-text directive that follows the image blocks in the user turn.
-
-    Tells the model the index range so the JSON list it emits is
-    contractually one-to-one with the input order. Indices are
-    zero-based to match Python and the rubric's example.
-    """
-    last = batch_size - 1
-    return (
-        f"Describe images 0 through {last}. "
-        "Return a JSON list with one entry per image, in the order received."
-    )
-
-
 def _extract_response_text(response: object) -> str:
     """Pull the JSON-bearing text out of a vision response, stripping fences.
 
@@ -364,8 +379,8 @@ def _extract_response_text(response: object) -> str:
     of typed blocks; only `text` blocks carry JSON. Some models wrap
     the JSON in a ```json ... ``` Markdown fence despite the rubric
     explicitly forbidding it — strip a single leading/trailing fence
-    pair so the downstream `json.loads` does not trip on a Markdown
-    artefact.
+    pair (with or without a language tag) so the downstream
+    `json.loads` does not trip on a Markdown artefact.
     """
     blocks = [b for b in getattr(response, "content", []) if getattr(b, "type", None) == "text"]
     if not blocks:
@@ -420,7 +435,9 @@ def _parse_batch_response(response: object, batch_size: int) -> list[dict]:
     with shape (list of judgments with unique indices), and
     `_validate_judgment_entry` deals with per-entry typing. Each step
     raises `ValueError` so the orchestrator's `_recoverable_errors`
-    tuple catches every shape violation as a per-batch failure.
+    tuple catches every shape violation as a per-batch failure. The
+    caller logs `response.stop_reason` alongside any `JSONDecodeError`
+    so a `max_tokens` truncation surfaces as a diagnosable cause.
     """
     text = _extract_response_text(response)
     decoded = json.loads(text)
@@ -442,7 +459,7 @@ def _apply_judgment(
     candidate: _Candidate,
     judgment: dict,
     *,
-    language: str,
+    language: SupportedLanguage,
     version: str,
     described_at: datetime,
 ) -> MediaPhotoDescribed:
@@ -496,71 +513,110 @@ def _record_batch_failure(
         )
 
 
+def _apply_batch_as_decorative_empty(
+    *,
+    batch: list[_Candidate],
+    language: SupportedLanguage,
+    version: str,
+    described_at: datetime,
+    report: DescribeReport,
+) -> None:
+    """Mark every photo in a refused batch as decorative + empty description.
+
+    The Anthropic SDK returns `stop_reason="refusal"` on safety-policy
+    refusals (identifiable faces, NSFW, ...). The rubric already says
+    "if undescribable, mark decorative with empty description" — we
+    apply that at the SDK level too, so the batch makes progress
+    instead of churning forever. Each photo transitions to
+    `MediaPhotoDescribed(is_decorative=True, description="")` and
+    counts toward `photos_described`, not `photos_failed`.
+    """
+    for candidate in batch:
+        new_entry = MediaPhotoDescribed(
+            url=candidate.entry.url,
+            local_path=candidate.entry.local_path,
+            width=candidate.entry.width,
+            height=candidate.entry.height,
+            bytes_size=candidate.entry.bytes_size,
+            downloaded_at=candidate.entry.downloaded_at,
+            is_decorative=True,
+            description="",
+            description_lang=language,
+            description_version=version,
+            described_at=described_at,
+        )
+        candidate.item.media[candidate.index] = new_entry
+        report.photos_described += 1
+
+
 def describe_all(
     items: dict[str, Item],
     media_root: Path,
     *,
     model: str,
-    output_language: str,
+    output_language: SupportedLanguage,
     description_version: str,
     force: bool = False,
     limit: int | None = None,
     items_filter: list[str] | None = None,
     batch_size: int = _DEFAULT_BATCH_SIZE,
-    client: object | None = None,
+    client: VisionClient | None = None,
     on_progress: Callable[[], None] | None = None,
     now: Callable[[], datetime] | None = None,
 ) -> DescribeReport:
     """Describe every eligible photo across the store; return a structured report.
 
     Eligibility (without `--force`):
+
     - `MediaPhotoDownloaded` — always.
-    - `MediaPhotoDescribed` whose `description_version` ≠ `description_version`
-      (the per-call argument, sourced from `config.describe_version`).
+    - `MediaPhotoDescribed` whose `description_version` ≠ the configured
+      version, OR whose `description_lang` ≠ the configured language.
+      The language check guards against mixed-language vaults that
+      would otherwise leak Spanish prose into an English run (or
+      vice-versa) via the enrich prompt.
 
     With `--force`:
+
     - Every `MediaPhotoDownloaded` AND every `MediaPhotoDescribed` is
       re-described. The persisted description is overwritten.
 
     Out of scope (every run):
-    - `MediaPhotoPending` — describing only runs over photos with bytes
-      on disk; the operator must call `xbrain media` first.
-    - `MediaPhotoFailed` — same reason.
-    - `MediaVideoPending` — photos only; vision-API support for video
-      is a separate phase.
+
+    - `MediaPhotoPending`, `MediaPhotoFailed`, `MediaVideoPending` —
+      describing only runs over photos with bytes on disk.
 
     The function mutates `items` in place; the caller is expected to
     wrap each batch transition with a store write (the `on_progress`
-    callback fires after every batch, success or failure). The
-    Ctrl-C-coherent invariant lives there: the store is written
-    between batches, never mid-API-call.
-
-    `media_root` is the directory under which `<item_id>/<index>.<ext>`
-    photo files live (typically `data/media/`).
+    callback fires after every batch). The Ctrl-C-coherent invariant
+    lives there: the store is written between batches.
 
     Raises:
-        RuntimeError: when EVERY batch attempted in the run fails. A
-            total failure (a revoked API key, an exhausted quota, a
-            corrupted media tree) must surface as a non-zero exit. The
+        RuntimeError: when EVERY batch attempted in the run fails. The
             CLI's `_handle_cli_errors` converts this into a clean
             operator message + exit code 1.
     """
     if client is None:
         from anthropic import Anthropic  # lazy: tests inject FakeAnthropic
 
-        client = Anthropic()  # reads ANTHROPIC_API_KEY from the environment
+        # reads ANTHROPIC_API_KEY from the environment; the real `Anthropic`
+        # class is structurally compatible with `VisionClient` so the cast
+        # is a documentation tool, not a runtime conversion.
+        active_client: VisionClient = Anthropic()  # type: ignore[assignment]
+    else:
+        active_client = client
     clock: Callable[[], datetime] = now if now is not None else _utcnow
 
     started = time.monotonic()
     report = DescribeReport()
     filter_set = set(items_filter) if items_filter else None
     candidates = list(
-        _iter_candidates(
+        _iter_eligible_candidates(
             items,
             force=force,
             limit=limit,
             items_filter=filter_set,
             current_version=description_version,
+            current_language=output_language,
             report=report,
         )
     )
@@ -571,10 +627,14 @@ def describe_all(
     system = _system_prompt(output_language)
     recoverable = _recoverable_errors()
 
-    for batch in _chunked(candidates, batch_size):
+    # `itertools.batched` requires Python 3.12 (declared in `pyproject.toml`).
+    # It returns tuples; the orchestrator works in lists so it can use `len`
+    # and slice freely.
+    for batch_tuple in itertools.batched(candidates, batch_size):
+        batch = list(batch_tuple)
         _run_one_batch(
             batch=batch,
-            client=client,
+            client=active_client,
             model=model,
             system=system,
             output_language=output_language,
@@ -592,13 +652,29 @@ def describe_all(
     return report
 
 
+def _user_directive_text(batch_size: int) -> dict:
+    """Build the trailing text block for the user turn — index-range directive.
+
+    Tells the model the index range so the JSON list it emits is
+    contractually one-to-one with the input order. Indices are
+    zero-based to match Python and the rubric's example.
+    """
+    return {
+        "type": "text",
+        "text": (
+            f"Describe images 0 through {batch_size - 1}. "
+            "Return a JSON list with one entry per image, in the order received."
+        ),
+    }
+
+
 def _run_one_batch(
     *,
     batch: list[_Candidate],
-    client: object,
+    client: VisionClient,
     model: str,
     system: str,
-    output_language: str,
+    output_language: SupportedLanguage,
     description_version: str,
     clock: Callable[[], datetime],
     media_root: Path,
@@ -609,6 +685,11 @@ def _run_one_batch(
 
     Pulled out of `describe_all` to keep the outer loop's complexity
     under grade C while preserving the per-batch isolation contract.
+    Refusal responses (`stop_reason="refusal"`) are converted to a
+    decorative-empty transition for each photo in the batch — that
+    satisfies the spec's "if undescribable, mark decorative" rule at
+    the SDK level so the run makes progress instead of churning.
+
     Programmer bugs (`AttributeError`, ...) and `KeyboardInterrupt`
     fall outside `recoverable` and propagate.
     """
@@ -616,14 +697,39 @@ def _run_one_batch(
     report.photos_attempted += len(batch)
     try:
         content_blocks = _build_user_blocks(batch, media_root)
-        response = _messages_create(
-            client=client,
+        response = client.messages.create(
             model=model,
             max_tokens=_MAX_TOKENS,
             system=system,
-            content_blocks=content_blocks,
+            messages=[{"role": "user", "content": content_blocks}],
         )
-        judgments = _parse_batch_response(response, batch_size=len(batch))
+        stop_reason = getattr(response, "stop_reason", None)
+        if stop_reason == "refusal":
+            logger.warning(
+                "describe: batch of %d photos refused by SDK; "
+                "marking each as decorative with empty description",
+                len(batch),
+            )
+            _apply_batch_as_decorative_empty(
+                batch=batch,
+                language=output_language,
+                version=description_version,
+                described_at=clock(),
+                report=report,
+            )
+            return
+        try:
+            judgments = _parse_batch_response(response, batch_size=len(batch))
+        except json.JSONDecodeError as exc:
+            # Surface stop_reason so a `max_tokens` truncation is
+            # diagnosable from the warning line alone — otherwise the
+            # operator gets a generic "Expecting value: line N column M".
+            logger.warning(
+                "describe: malformed JSON from vision response (stop_reason=%r): %s",
+                stop_reason,
+                exc,
+            )
+            raise
         _apply_batch_judgments(
             batch=batch,
             judgments=judgments,
@@ -667,60 +773,38 @@ def _build_user_blocks(batch: list[_Candidate], media_root: Path) -> list[dict]:
     for candidate in batch:
         data = _load_bytes(media_root, candidate.entry.local_path)
         blocks.append(_build_image_block(data, _media_type(candidate.entry.local_path)))
-    blocks.append({"type": "text", "text": _user_directive(len(batch))})
+    blocks.append(_user_directive_text(len(batch)))
     return blocks
 
 
-def _messages_create(
-    *,
-    client: object,
-    model: str,
-    max_tokens: int,
-    system: str,
-    content_blocks: list[dict],
-) -> object:
-    """Thin wrapper around the Anthropic SDK's `messages.create`.
-
-    Pulled out so the orchestrator stays readable and so tests can
-    inject a `FakeAnthropic` whose `messages.create` records kwargs.
-    Returns the raw response — the parser inspects `.content` blocks.
-    """
-    return client.messages.create(  # type: ignore[attr-defined]
-        model=model,
-        max_tokens=max_tokens,
-        system=system,
-        messages=[{"role": "user", "content": content_blocks}],
-    )
-
-
 def _apply_batch_judgments(
     *,
     batch: list[_Candidate],
     judgments: list[dict],
-    language: str,
+    language: SupportedLanguage,
     version: str,
     described_at: datetime,
     report: DescribeReport,
 ) -> None:
     """Apply per-image judgments to the batch, transitioning each entry.
 
-    Pairs each judgment with the candidate at its `index`. A judgment
-    count that does not match the batch size is a contract violation
-    (parser already enforces the index range) — if the list is short,
-    the missing candidates are tallied as per-photo failures so they
-    re-enter the candidate pool on the next run; if it is long, the
-    excess is a programmer bug (parser-side dup check rules out
-    duplicate indices already).
+    Pairs each judgment with the candidate at its `index`. The parser
+    already enforces the index range and rejects duplicates, so the
+    `by_index` dict is built directly without a paranoid re-check.
+
+    A judgment list SHORTER than the batch is a partial-data contract
+    violation: the missing candidates are tallied as per-photo failures
+    (so they re-enter the candidate pool on the next run) AND the
+    batch counts as `batches_failed += 1` even though some judgments
+    landed — the batch did not return complete data, so the operator's
+    view of "did this API call do its job" is "no, partially".
     """
     by_index = {entry["index"]: entry for entry in judgments}
-    if len(by_index) != len(judgments):
-        # Defence in depth — `_parse_batch_response` already rejects
-        # duplicate indices, so reaching here means the parser was
-        # bypassed. Raise so the developer sees it.
-        raise ValueError("internal: duplicate judgment indices survived parser check")
+    batch_had_omission = False
     for position, candidate in enumerate(batch):
         judgment = by_index.get(position)
         if judgment is None:
+            batch_had_omission = True
             report.photos_failed += 1
             report.per_item_failures.setdefault(candidate.item_id, []).append(
                 (candidate.entry.url, f"vision response omitted index {position}")
@@ -735,6 +819,11 @@ def _apply_batch_judgments(
         )
         candidate.item.media[candidate.index] = new_entry
         report.photos_described += 1
+    if batch_had_omission:
+        # A batch with missing judgments did not return complete data
+        # for the API call we made — count it as a batch failure so
+        # the "did this run net positive?" check is honest.
+        report.batches_failed += 1
 
 
 def emit_summary_line(report: DescribeReport, *, out: "io.IOBase | None" = None) -> None:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index d36d04b..1e6ec6f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -979,3 +979,204 @@ def _patched(store, media_root, **kwargs):
     result = runner.invoke(app, ["describe"])
     assert result.exit_code == 1
     assert "Error" in result.output
+
+
+def test_describe_command_emits_exactly_one_summary_on_partial_failure(tmp_path, monkeypatch):
+    """The CLI is the single source of truth for the SUMMARY line.
+
+    A partial-failure run must emit EXACTLY one `SUMMARY:` on stderr —
+    if the orchestrator regrows a second emitter the count goes to 2
+    and this pins it. Mirrors the dedup test on the orchestrator
+    side (`test_describe_all_does_not_emit_summary_on_partial_failure`).
+    """
+    import io as _io
+    import json as _json
+    from datetime import datetime, timezone
+
+    from anthropic import APIError
+    from PIL import Image
+
+    from xbrain.models import Author, Item, MediaPhotoDownloaded
+    from xbrain.store import save_store as _save_store
+
+    _setup_repo(tmp_path, monkeypatch)
+    media_root = tmp_path / "data" / "media"
+    (media_root / "1").mkdir(parents=True, exist_ok=True)
+    (media_root / "2").mkdir(parents=True, exist_ok=True)
+    buf = _io.BytesIO()
+    Image.new("RGB", (8, 6), color=(10, 20, 30)).save(buf, format="JPEG")
+    (media_root / "1" / "0.jpg").write_bytes(buf.getvalue())
+    (media_root / "2" / "0.jpg").write_bytes(buf.getvalue())
+
+    def _build_item(item_id: str) -> Item:
+        return Item(
+            id=item_id,
+            source="bookmark",
+            url=f"https://x.com/a/status/{item_id}",
+            author=Author(handle="a", name="A"),
+            text="text",
+            created_at=datetime(2026, 5, 10, tzinfo=timezone.utc),
+            captured_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            media=[
+                MediaPhotoDownloaded(
+                    url=f"https://pbs.twimg.com/media/{item_id}-0.jpg",
+                    local_path=f"{item_id}/0.jpg",
+                    width=8,
+                    height=6,
+                    bytes_size=200,
+                    downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+                )
+            ],
+        )
+
+    _save_store(
+        {"1": _build_item("1"), "2": _build_item("2")},
+        tmp_path / "data" / "items.json",
+    )
+
+    class _PartialFailMessages:
+        def __init__(self):
+            self.calls: list[dict] = []
+            self._counter = 0
+
+        def create(self, **kwargs):
+            self.calls.append(kwargs)
+            self._counter += 1
+            if self._counter == 1:
+                # First batch fails.
+                raise APIError("503", request=None, body=None)
+
+            # Second batch returns one judgment.
+            class _Block:
+                type = "text"
+
+                def __init__(self, text: str):
+                    self.text = text
+
+            class _Resp:
+                def __init__(self, text: str):
+                    self.content = [_Block(text)]
+                    self.stop_reason = "end_turn"
+
+            return _Resp(_json.dumps([{"index": 0, "is_decorative": False, "description": "ok"}]))
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _PartialFailMessages()
+
+    import xbrain.cli as cli
+
+    fake = _FakeClient()
+
+    def _patched(store, media_root, **kwargs):
+        kwargs["client"] = fake
+        kwargs["batch_size"] = 1
+        return _orig(store, media_root, **kwargs)
+
+    _orig = cli.run_describe_all
+    monkeypatch.setattr(cli, "run_describe_all", _patched)
+
+    result = runner.invoke(app, ["describe"])
+    assert result.exit_code == 0
+    # Exactly one SUMMARY emission — the CLI's `emit_summary_line` is
+    # the single source of truth; the orchestrator stays silent.
+    assert result.output.count("SUMMARY:") == 1
+
+
+def test_describe_command_verbose_lists_failed_photos(tmp_path, monkeypatch):
+    """`--verbose` prints `Failed photos:` plus per-failure rows on partial failure.
+
+    Pins the diagnostic branch in `_run_describe` so a future refactor
+    that drops the verbose output is caught.
+    """
+    import io as _io
+    import json as _json
+    from datetime import datetime, timezone
+
+    from anthropic import APIError
+    from PIL import Image
+
+    from xbrain.models import Author, Item, MediaPhotoDownloaded
+    from xbrain.store import save_store as _save_store
+
+    _setup_repo(tmp_path, monkeypatch)
+    media_root = tmp_path / "data" / "media"
+    (media_root / "1").mkdir(parents=True, exist_ok=True)
+    (media_root / "2").mkdir(parents=True, exist_ok=True)
+    buf = _io.BytesIO()
+    Image.new("RGB", (8, 6), color=(10, 20, 30)).save(buf, format="JPEG")
+    (media_root / "1" / "0.jpg").write_bytes(buf.getvalue())
+    (media_root / "2" / "0.jpg").write_bytes(buf.getvalue())
+
+    def _build_item(item_id: str) -> Item:
+        return Item(
+            id=item_id,
+            source="bookmark",
+            url=f"https://x.com/a/status/{item_id}",
+            author=Author(handle="a", name="A"),
+            text="text",
+            created_at=datetime(2026, 5, 10, tzinfo=timezone.utc),
+            captured_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            media=[
+                MediaPhotoDownloaded(
+                    url=f"https://pbs.twimg.com/media/{item_id}-0.jpg",
+                    local_path=f"{item_id}/0.jpg",
+                    width=8,
+                    height=6,
+                    bytes_size=200,
+                    downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+                )
+            ],
+        )
+
+    _save_store(
+        {"1": _build_item("1"), "2": _build_item("2")},
+        tmp_path / "data" / "items.json",
+    )
+
+    class _PartialFailMessages:
+        def __init__(self):
+            self.calls: list[dict] = []
+            self._counter = 0
+
+        def create(self, **kwargs):
+            self.calls.append(kwargs)
+            self._counter += 1
+            if self._counter == 1:
+                raise APIError("503", request=None, body=None)
+
+            class _Block:
+                type = "text"
+
+                def __init__(self, text: str):
+                    self.text = text
+
+            class _Resp:
+                def __init__(self, text: str):
+                    self.content = [_Block(text)]
+                    self.stop_reason = "end_turn"
+
+            return _Resp(_json.dumps([{"index": 0, "is_decorative": False, "description": "ok"}]))
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _PartialFailMessages()
+
+    import xbrain.cli as cli
+
+    fake = _FakeClient()
+
+    def _patched(store, media_root, **kwargs):
+        kwargs["client"] = fake
+        kwargs["batch_size"] = 1
+        return _orig(store, media_root, **kwargs)
+
+    _orig = cli.run_describe_all
+    monkeypatch.setattr(cli, "run_describe_all", _patched)
+
+    result = runner.invoke(app, ["describe", "--verbose"])
+    assert result.exit_code == 0
+    assert "Failed photos:" in result.output
+    # The failed item id (1 or 2 — order is filesystem-dependent) and the URL
+    # both appear on the verbose row.
+    assert "pbs.twimg.com" in result.output
diff --git a/tests/test_describe.py b/tests/test_describe.py
index 5072b95..d6aeaae 100644
--- a/tests/test_describe.py
+++ b/tests/test_describe.py
@@ -27,7 +27,7 @@
 
 from xbrain.describe import (
     DescribeReport,
-    _eligible,
+    _is_eligible,
     _parse_batch_response,
     _validate_judgment_entry,
     describe_all,
@@ -163,14 +163,50 @@ class _FakeListResponse:
     `json.dumps(payload)` — that path expects a dict. For describe we
     need to ship a list payload, so we build a parallel response
     type that mirrors the shape `_parse_batch_response` consumes.
+
+    `stop_reason` defaults to `"end_turn"` (the SDK's normal terminus);
+    pass `stop_reason="refusal"` to simulate a safety-policy refusal,
+    or any other string to simulate a max-tokens truncation etc.
     """
 
-    def __init__(self, judgments: list[dict]):
+    def __init__(self, judgments: list[dict], *, stop_reason: str = "end_turn"):
         import json
 
         self.content = [type(FakeBlock(payload={}))(payload={})]  # placeholder block
         # Overwrite the text on the block with the JSON list serialisation.
         self.content[0].text = json.dumps(judgments)
+        self.stop_reason = stop_reason
+
+
+class _FakeRefusalResponse:
+    """A response object simulating Anthropic SDK refusal.
+
+    `stop_reason="refusal"` is the SDK's signal that the model declined
+    to answer (identifiable face, NSFW, ...). The orchestrator must
+    detect this and convert the batch to decorative+empty rather than
+    fail it. `content` is intentionally empty — a real refusal response
+    can carry no text blocks.
+    """
+
+    def __init__(self):
+        self.content: list = []
+        self.stop_reason = "refusal"
+
+
+class _FakeTruncatedResponse:
+    """A response with `stop_reason="max_tokens"` and malformed JSON text.
+
+    Simulates the failure mode where the model hit the token cap mid-emit
+    and the SDK returns a truncated, unparseable JSON fragment. The
+    orchestrator must surface `stop_reason` in the warning log line so
+    the operator can diagnose the cause from logs alone.
+    """
+
+    def __init__(self, truncated_text: str = '[{"index": 0, "is_dec'):
+        block = type(FakeBlock(payload={}))(payload={})
+        block.text = truncated_text
+        self.content = [block]
+        self.stop_reason = "max_tokens"
 
 
 class _FakeMessagesList:
@@ -178,18 +214,25 @@ class _FakeMessagesList:
 
     Mirrors `tests.conftest.FakeMessages` but uses `_FakeListResponse`
     so the payload is a JSON list. Exception instances are raised
-    rather than wrapped (same convention).
+    rather than wrapped (same convention). A payload that is already a
+    response object (e.g. `_FakeRefusalResponse`) is returned as-is so
+    tests can simulate stop-reason variants without a wrapper.
     """
 
     def __init__(self, payloads: list):
         self._payloads = list(payloads)
         self.calls: list[dict] = []
 
-    def create(self, **kwargs) -> _FakeListResponse:
+    def create(self, **kwargs):
         self.calls.append(kwargs)
         payload = self._payloads.pop(0)
         if isinstance(payload, Exception):
             raise payload
+        # Pass pre-built response objects through unchanged so callers
+        # can ship refusal / truncation / custom-stop-reason responses
+        # without a `_FakeListResponse` wrapper around a JSON list.
+        if hasattr(payload, "content") and hasattr(payload, "stop_reason"):
+            return payload
         return _FakeListResponse(payload)
 
 
@@ -204,23 +247,43 @@ def __init__(self, payloads: list):
 
 
 def test_eligible_downloaded_always_eligible():
-    """Downloaded entries are always eligible regardless of force/version."""
+    """Downloaded entries are always eligible regardless of force/version/language."""
     entry = _downloaded()
-    assert _eligible(entry, force=False, current_version="v1") is True
-    assert _eligible(entry, force=True, current_version="v1") is True
+    assert (
+        _is_eligible(entry, force=False, current_version="v1", current_language="English") is True
+    )
+    assert _is_eligible(entry, force=True, current_version="v1", current_language="English") is True
 
 
 def test_eligible_described_current_version_only_with_force():
-    """A described entry on the current version is skipped without --force."""
+    """A described entry on the current version+language is skipped without --force."""
     entry = _described(version="v1")
-    assert _eligible(entry, force=False, current_version="v1") is False
-    assert _eligible(entry, force=True, current_version="v1") is True
+    assert (
+        _is_eligible(entry, force=False, current_version="v1", current_language="English") is False
+    )
+    assert _is_eligible(entry, force=True, current_version="v1", current_language="English") is True
 
 
 def test_eligible_described_stale_version_is_eligible():
     """A described entry on a stale version is eligible without --force."""
     entry = _described(version="v1")
-    assert _eligible(entry, force=False, current_version="v2") is True
+    assert (
+        _is_eligible(entry, force=False, current_version="v2", current_language="English") is True
+    )
+
+
+def test_eligible_described_stale_language_is_eligible():
+    """A described entry whose language drifted vs the current config is eligible.
+
+    Guards against the mixed-language vault regression: switching
+    `output_language` from English → Spanish (or vice-versa) must mark
+    previously-described entries stale on the next run so the enrich
+    prompt does not splice the wrong language into the new vault.
+    """
+    entry = _described(version="v1")  # described_lang="English" by default
+    assert (
+        _is_eligible(entry, force=False, current_version="v1", current_language="Spanish") is True
+    )
 
 
 def test_eligible_pending_failed_video_are_never_eligible():
@@ -235,8 +298,14 @@ def test_eligible_pending_failed_video_are_never_eligible():
     )
     video = MediaVideoPending(url="https://video.twimg.com/x.mp4")
     for entry in (pending, failed, video):
-        assert _eligible(entry, force=False, current_version="v1") is False
-        assert _eligible(entry, force=True, current_version="v1") is False
+        assert (
+            _is_eligible(entry, force=False, current_version="v1", current_language="English")
+            is False
+        )
+        assert (
+            _is_eligible(entry, force=True, current_version="v1", current_language="English")
+            is False
+        )
 
 
 # --------------------------------------------------------------------- parser
@@ -909,6 +978,168 @@ def test_describe_all_uses_injectable_clock(tmp_path: Path):
     assert described.described_at == fixed
 
 
+# --------------------------------------------------------------------- payload shape
+
+
+def test_describe_all_sends_payload_with_expected_kwarg_shape(tmp_path: Path):
+    """The kwargs sent to `messages.create` must match the wire contract.
+
+    A regression in the payload shape (wrong `max_tokens`, dropped
+    `system`, role flip, image-block restructure) is otherwise invisible
+    until production. Pins the structure end-to-end.
+    """
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(2)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient([[_judgment(0), _judgment(1)]])
+    describe_all(
+        store,
+        media_root,
+        model="claude-sonnet-4-6",
+        output_language="English",
+        description_version="v1",
+        batch_size=2,
+        client=client,
+    )
+    kwargs = client.messages.calls[0]
+    assert kwargs["model"] == "claude-sonnet-4-6"
+    assert kwargs["max_tokens"] == 1200
+    assert isinstance(kwargs["system"], str) and kwargs["system"]
+    messages = kwargs["messages"]
+    assert isinstance(messages, list) and len(messages) == 1
+    assert messages[0]["role"] == "user"
+    content = messages[0]["content"]
+    # 2 image blocks + exactly one trailing text directive.
+    assert len(content) == 3
+    image_blocks = content[:2]
+    for block in image_blocks:
+        assert block["type"] == "image"
+        source = block["source"]
+        assert source["type"] == "base64"
+        assert source["media_type"] == "image/jpeg"
+        assert isinstance(source["data"], str) and source["data"]
+    text_block = content[2]
+    assert text_block["type"] == "text"
+    assert "Describe images 0 through 1" in text_block["text"]
+
+
+# --------------------------------------------------------------------- refusal
+
+
+def test_describe_all_handles_refusal_as_decorative_empty(tmp_path: Path):
+    """SDK `stop_reason="refusal"` must transition every photo to decorative+empty.
+
+    The Anthropic safety policy refuses identifiable faces / NSFW; the
+    rubric says "if undescribable, mark decorative with empty
+    description". The orchestrator applies that contract at the SDK
+    level too so the batch makes progress instead of churning forever.
+    """
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(5)],
+    )
+    store = {"1": item}
+    client = _FakeVisionClient([_FakeRefusalResponse()])
+    report = describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=5,
+        client=client,
+    )
+    # All five photos must be Described as decorative + empty.
+    for media_entry in item.media:
+        assert isinstance(media_entry, MediaPhotoDescribed)
+        assert media_entry.is_decorative is True
+        assert media_entry.description == ""
+    # Refusal handling counts as described, not failed — the photo made
+    # progress out of Downloaded.
+    assert report.photos_described == 5
+    assert report.photos_failed == 0
+
+
+def test_describe_all_logs_stop_reason_on_json_decode_failure(tmp_path: Path, caplog):
+    """A `max_tokens` truncation must surface stop_reason in the warning log.
+
+    Otherwise the operator gets only "Expecting value: line N column M"
+    and cannot tell whether the cause is a truncated response, a
+    refusal, or a malformed model emission.
+    """
+    media_root = tmp_path / "media"
+    item = _item("1", [_downloaded(item_id="1", index=0, media_root=media_root)])
+    store = {"1": item}
+    client = _FakeVisionClient([_FakeTruncatedResponse()])
+    with caplog.at_level("WARNING", logger="xbrain.describe"):
+        # Total failure raises (1 batch attempted, 0 described).
+        with pytest.raises(RuntimeError):
+            describe_all(
+                store,
+                media_root,
+                model="m",
+                output_language="English",
+                description_version="v1",
+                batch_size=1,
+                client=client,
+            )
+    messages = "\n".join(record.getMessage() for record in caplog.records)
+    assert "stop_reason='max_tokens'" in messages or "stop_reason=" in messages
+
+
+# --------------------------------------------------------------------- fence variants
+
+
+def test_parse_batch_response_strips_fence_without_language_tag():
+    """Some models emit ``` ... ``` (no `json` after the opening fence)."""
+    import json
+
+    class _Fenced:
+        content = [type("B", (), {"type": "text", "text": ""})()]
+
+    fenced_text = "```\n" + json.dumps([_judgment(0)]) + "\n```"
+    _Fenced.content[0].text = fenced_text
+    out = _parse_batch_response(_Fenced(), batch_size=1)
+    assert out[0]["index"] == 0
+
+
+# --------------------------------------------------------------------- partial-data batch
+
+
+def test_describe_all_partial_judgments_count_as_batch_failed(tmp_path: Path):
+    """A batch with missing judgments must bump batches_failed (not 0).
+
+    The vision model returned 1 of 2 expected judgments. One photo
+    transitions to Described, the other stays Downloaded, and the
+    batch counts as a failure because it did NOT return complete
+    data for the API call we made.
+    """
+    media_root = tmp_path / "media"
+    item = _item(
+        "1",
+        [_downloaded(item_id="1", index=i, media_root=media_root) for i in range(2)],
+    )
+    store = {"1": item}
+    # Only one judgment for a 2-image batch.
+    client = _FakeVisionClient([[_judgment(0, description="ok")]])
+    report = describe_all(
+        store,
+        media_root,
+        model="m",
+        output_language="English",
+        description_version="v1",
+        batch_size=2,
+        client=client,
+    )
+    assert report.photos_described == 1
+    assert report.photos_failed == 1
+    assert report.batches_failed == 1
+
+
 # --------------------------------------------------------------------- summary line
 
 

From a373eff8814496655686228723fc9b5b0a5b59b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 20:40:12 +0200
Subject: [PATCH 10/11] [#34] inline _user_directive_text into
 _build_user_blocks

Single-caller helper merged into its only caller. Removes the
last surviving gold-plating flagged by code-simplifier in round 1
of the Phase B review pipeline.
---
 src/xbrain/describe.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/xbrain/describe.py b/src/xbrain/describe.py
index fd7d675..6bf6e36 100644
--- a/src/xbrain/describe.py
+++ b/src/xbrain/describe.py
@@ -652,22 +652,6 @@ def describe_all(
     return report
 
 
-def _user_directive_text(batch_size: int) -> dict:
-    """Build the trailing text block for the user turn — index-range directive.
-
-    Tells the model the index range so the JSON list it emits is
-    contractually one-to-one with the input order. Indices are
-    zero-based to match Python and the rubric's example.
-    """
-    return {
-        "type": "text",
-        "text": (
-            f"Describe images 0 through {batch_size - 1}. "
-            "Return a JSON list with one entry per image, in the order received."
-        ),
-    }
-
-
 def _run_one_batch(
     *,
     batch: list[_Candidate],
@@ -773,7 +757,18 @@ def _build_user_blocks(batch: list[_Candidate], media_root: Path) -> list[dict]:
     for candidate in batch:
         data = _load_bytes(media_root, candidate.entry.local_path)
         blocks.append(_build_image_block(data, _media_type(candidate.entry.local_path)))
-    blocks.append(_user_directive_text(len(batch)))
+    # Trailing text directive: tell the model the index range so its JSON
+    # list is contractually one-to-one with the input order. Indices are
+    # zero-based to match Python and the rubric's example.
+    blocks.append(
+        {
+            "type": "text",
+            "text": (
+                f"Describe images 0 through {len(batch) - 1}. "
+                "Return a JSON list with one entry per image, in the order received."
+            ),
+        }
+    )
     return blocks
 
 

From 4c09ffffc3dd8586acb7613fd1392de0dd3fd4b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Victor=20Gonz=C3=A1lez-Pacheco?= <victor@fitizens.io>
Date: Sun, 24 May 2026 20:41:34 +0200
Subject: [PATCH 11/11] [#34] add negative tests for MediaPhotoDescribed
 invariants + media_type warning

Closes 3 flags from the verify round of the Phase B review pipeline:

- test_models.py: 3 negative tests pin the model-level invariants on
  MediaPhotoDescribed (UTC-aware described_at, SUPPORTED_LANGUAGES
  membership for description_lang, decorative => empty description).
- test_describe.py: 1 unit test pins _media_type's unknown-extension
  fallback to image/jpeg AND the operator-visible logger.warning, so
  a silent regression on either side is caught.

Test count: 502 -> 506. Quality gate green.
---
 tests/test_describe.py | 19 +++++++++
 tests/test_models.py   | 87 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/tests/test_describe.py b/tests/test_describe.py
index d6aeaae..f5b44eb 100644
--- a/tests/test_describe.py
+++ b/tests/test_describe.py
@@ -1026,6 +1026,25 @@ def test_describe_all_sends_payload_with_expected_kwarg_shape(tmp_path: Path):
     assert "Describe images 0 through 1" in text_block["text"]
 
 
+def test_media_type_warns_on_unknown_extension(caplog):
+    """Unknown extensions fall back to image/jpeg AND emit a `logger.warning`.
+
+    The downloader only writes `.jpg`/`.jpeg`/`.png`/`.webp`, so an unknown
+    suffix means either a hand-edited `items.json` or a future format the
+    code does not register. The warning is the operator's only signal that
+    the wrong MIME type was sent to Anthropic — pin it so a silent fallback
+    cannot regress.
+    """
+    import logging
+
+    from xbrain.describe import _media_type
+
+    with caplog.at_level(logging.WARNING, logger="xbrain.describe"):
+        result = _media_type("123/0.gif")
+    assert result == "image/jpeg"
+    assert any("unknown extension" in rec.message.lower() for rec in caplog.records)
+
+
 # --------------------------------------------------------------------- refusal
 
 
diff --git a/tests/test_models.py b/tests/test_models.py
index 8df67c9..7a80397 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -408,6 +408,93 @@ def test_media_photo_described_rejects_parent_traversal_local_path():
         )
 
 
+def test_media_photo_described_rejects_naive_described_at():
+    """`described_at` must be timezone-aware (UTC); a naive datetime is rejected.
+
+    The UTC-aware invariant is enforced by a field validator so a
+    hand-edited `items.json` entry cannot smuggle a local-time datetime
+    past the type boundary. Naive timestamps cause downstream UTC math
+    (eligibility checks, sort orders) to drift silently.
+    """
+    import pytest
+    from pydantic import ValidationError
+
+    from xbrain.models import MediaPhotoDescribed
+
+    with pytest.raises(ValidationError):
+        MediaPhotoDescribed(
+            url="https://pbs.twimg.com/media/X.jpg",
+            local_path="123/0.jpg",
+            width=10,
+            height=10,
+            bytes_size=100,
+            downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            is_decorative=False,
+            description="hello",
+            description_lang="English",
+            description_version="v1",
+            described_at=datetime(2026, 5, 24),  # naive — must fail
+        )
+
+
+def test_media_photo_described_rejects_unsupported_description_lang():
+    """`description_lang` must be in `SUPPORTED_LANGUAGES`; others are rejected.
+
+    The type alias is derived from `i18n.SUPPORTED_LANGUAGES` so the
+    `Literal[...]` validator rejects unknown languages at construction.
+    Prevents an out-of-band language tag from polluting the vault.
+    """
+    import pytest
+    from pydantic import ValidationError
+
+    from xbrain.models import MediaPhotoDescribed
+
+    with pytest.raises(ValidationError):
+        MediaPhotoDescribed(
+            url="https://pbs.twimg.com/media/X.jpg",
+            local_path="123/0.jpg",
+            width=10,
+            height=10,
+            bytes_size=100,
+            downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            is_decorative=False,
+            description="hello",
+            description_lang="Klingon",  # not in SUPPORTED_LANGUAGES
+            description_version="v1",
+            described_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        )
+
+
+def test_media_photo_described_rejects_decorative_with_nonempty_description():
+    """`is_decorative=True` implies `description == ""` — model-validator enforces.
+
+    Defence-in-depth for hand-edited records: the producer
+    (`describe._apply_judgment`) already blanks the description on
+    decorative judgments, but a hand-written entry that violates the
+    invariant must still be rejected at the type boundary so downstream
+    callers can rely on `is_decorative => not description` unconditionally.
+    """
+    import pytest
+    from pydantic import ValidationError
+
+    from xbrain.models import MediaPhotoDescribed
+
+    with pytest.raises(ValidationError):
+        MediaPhotoDescribed(
+            url="https://pbs.twimg.com/media/X.jpg",
+            local_path="123/0.jpg",
+            width=10,
+            height=10,
+            bytes_size=100,
+            downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+            is_decorative=True,
+            description="should be empty when decorative",  # violates invariant
+            description_lang="English",
+            description_version="v1",
+            described_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
+        )
+
+
 def test_media_discriminator_rejects_unknown_kind():
     """Silently inventing a variant would mask data corruption — reject loudly."""
     import pytest