diff --git a/README.md b/README.md
index c5ca49c..39198a0 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,8 @@ is the central selection risk — fewer, better candidates beat more), and
 import ir
 
 # Define a corpus, build the index (incremental), then discover:
-source = ir.CorpusSource.from_skills()       # or from_packages(), from_md_reports(), from_files(...)
+source = ir.CorpusSource.from_skills()       # or from_packages(), from_md_reports(),
+                                              # from_claude_sessions(), from_files(...)
 corpus = ir.build(source)                     # embed + persist under XDG dirs
 result = ir.discover(corpus, "how do I deploy the app to the server")
 
@@ -242,8 +243,11 @@ ir search skills "deploy the app"        # rank candidates (retrieval only)
 ir discover skills "deploy the app"      # retrieve -> select
 ir discover skills "deploy the app" --disclose       # + load bodies
 ir discover skills "deploy the app" --min-score auto # + calibrated abstention
+ir build sessions                        # index recent Claude Code sessions (turn pairs)
+ir search sessions "numpy abi error" --mode lexical   # find past sessions
 ir ls                                    # list corpora + record counts
-ir info skills                           # config, stats, calibrated floors
+ir info skills                           # config, stats, policy, calibrated floors
+ir maintain --all                        # run due background work (idempotent; cron-friendly)
 ir register notes files --root ~/notes --pattern '.*\.md$'  # register a custom corpus
 ir rm notes                              # unregister (keeps built data)
 ir eval-gen skills skills_eval.jsonl     # generate eval cases (needs oa/LLM)
diff --git a/ir/__init__.py b/ir/__init__.py
index f9a43c1..2595670 100644
--- a/ir/__init__.py
+++ b/ir/__init__.py
@@ -45,7 +45,15 @@
     default_edge_extractor,
 )
 from .index import Corpus, build, open_corpus
-from .registry import retriever_for, retrievers
+from .maintenance import MaintenanceResult, maintain, maintain_corpus
+from .policy import (
+    MaintenancePolicy,
+    ReindexPolicy,
+    SynopsisPolicy,
+    default_policy_for_kind,
+    resolve_policy,
+)
+from .registry import policy_for, retriever_for, retrievers
 from .retrieve import Retriever, as_retriever, fuse_hits, records_for_artifact
 from .retrieve import search as _search
 from .select import (
@@ -58,7 +66,14 @@
 )
 from .sources import CorpusSource
 from .store import CorpusStore
-from .strategy import Chunked, IndexingStrategy, Package, Skill, WholeText
+from .strategy import (
+    Chunked,
+    ClaudeTurn,
+    IndexingStrategy,
+    Package,
+    Skill,
+    WholeText,
+)
 from .synopsis import Synthesizer, make_llm_synthesizer, with_synopsis
 from .traverse import WalkPolicy, WalkState, collapsed_tree_policy, traverse
 
@@ -73,6 +88,7 @@
     "Chunked",
     "Skill",
     "Package",
+    "ClaudeTurn",
     "with_synopsis",
     "make_llm_synthesizer",
     "Synthesizer",
@@ -114,6 +130,15 @@
     "register",
     "corpora",
     "build_corpus",
+    "maintain",
+    "maintain_corpus",
+    "MaintenanceResult",
+    "MaintenancePolicy",
+    "ReindexPolicy",
+    "SynopsisPolicy",
+    "resolve_policy",
+    "default_policy_for_kind",
+    "policy_for",
 ]
 
 register = registry.register
@@ -148,6 +173,6 @@ def search(corpus, query, **kwargs):
 # The evaluation harness is reachable as ``ir.eval`` (its ``ef`` imports are
 # lazy, so this does not weigh down ``import ir``). Kept out of ``__all__`` so a
 # star-import does not shadow the ``eval`` builtin. ``ir.eval_gen`` is the
-# build-time case generator (its ``oa`` import is lazy too).
+# build-time case generator (its ``aix`` import is lazy too).
 from . import eval  # noqa: E402,F401  (submodule attribute: ir.eval)
 from . import eval_gen  # noqa: E402,F401  (submodule attribute: ir.eval_gen)
diff --git a/ir/__main__.py b/ir/__main__.py
index 9d73db7..1fabd92 100644
--- a/ir/__main__.py
+++ b/ir/__main__.py
@@ -10,7 +10,28 @@ def main():
     import argh
 
     parser = argh.ArghParser()
-    argh.add_commands(parser, COMMANDS)
+    # argh >= 0.30 requires an explicit name-mapping policy as soon as a command
+    # has an *optional positional* (e.g. ``maintain(name=None, ...)``).
+    # ``BY_NAME_IF_KWONLY`` keeps positional params positional and maps
+    # keyword-only params to options — exactly ir's existing command convention,
+    # so it changes nothing for the other commands. Fall back gracefully on
+    # older argh that lacks the policy.
+    try:
+        policy = argh.NameMappingPolicy.BY_NAME_IF_KWONLY
+    except AttributeError:
+        try:
+            from argh.assembling import NameMappingPolicy
+
+            policy = NameMappingPolicy.BY_NAME_IF_KWONLY
+        except ImportError:
+            policy = None
+    try:
+        if policy is not None:
+            argh.add_commands(parser, COMMANDS, name_mapping_policy=policy)
+        else:
+            argh.add_commands(parser, COMMANDS)
+    except TypeError:  # very old argh without the kwarg
+        argh.add_commands(parser, COMMANDS)
     parser.dispatch()
 
 
diff --git a/ir/cli.py b/ir/cli.py
index 9a7b8b5..75cb201 100644
--- a/ir/cli.py
+++ b/ir/cli.py
@@ -11,7 +11,7 @@
     ir info packages                # config + stats for a corpus
     ir register notes files --root ~/notes --pattern '.*\\.md$'
     ir rm notes                     # unregister (keeps built data)
-    ir eval-gen skills skills_eval.jsonl --k 5        # generate cases (needs oa/LLM)
+    ir eval-gen skills skills_eval.jsonl --k 5        # generate cases (needs aix/LLM)
     ir eval skills skills_eval.jsonl --mode hybrid    # score retrieval on a case file
     ir eval-select skills skills_eval.jsonl           # score the selection stage
     ir sweep-select skills skills_eval.jsonl          # tune max_k/rel for the selector
@@ -63,14 +63,38 @@ def ls():
     return "\n".join(lines)
 
 
-def register(name, kind, *, root=None, pattern=None, embedder="default"):
-    """Register a named corpus. kind: skills | packages | reports | files."""
+def register(
+    name,
+    kind,
+    *,
+    root=None,
+    pattern=None,
+    embedder="default",
+    reindex_on=None,
+    every_hours=None,
+    synopsis=False,
+):
+    """Register a named corpus. kind: skills | packages | reports | files.
+
+    Background-work policy (optional; smart per-kind defaults otherwise — see
+    `ir.policy`): reindex_on (source-change | interval | manual), every_hours (for
+    interval), synopsis (enable LLM synopses, run only in the policy's downtime
+    window by `ir maintain`).
+    """
     params = {}
     if root:
         params["root"] = root
     if pattern:
         params["pattern"] = pattern
-    registry.register(name, kind, embedder=embedder, **params)
+    maintenance = None
+    if reindex_on or every_hours or synopsis:
+        reindex = {}
+        if every_hours:
+            reindex = {"on": "interval", "every_hours": float(every_hours)}
+        elif reindex_on:
+            reindex = {"on": reindex_on}
+        maintenance = {"reindex": reindex, "synopsis": {"enabled": bool(synopsis)}}
+    registry.register(name, kind, embedder=embedder, maintenance=maintenance, **params)
     return f"registered {name!r} (kind={kind}, embedder={embedder})"
 
 
@@ -158,18 +182,45 @@ def discover(
 
 
 def info(name):
-    """Show a corpus's stored config, stats, and any calibrated abstention floors."""
+    """Show a corpus's stored config, stats, policy, and any abstention floors."""
     corpus = open_corpus(name)
     cfg = corpus.store.get_config()
     reg = registry.get(name)
     calibrated = corpus.store.calibration_modes()
     floors = {m: corpus.store.get_calibration(m).get("min_score") for m in calibrated}
     cal = f"\nmin_score floors: {floors}" if floors else ""
+    pol = registry.policy_for(name)
+    state = corpus.store.get_maintenance_state()
+    last = state.get("last_maintained", "never")
+    syn = pol.synopsis
+    syn_str = f"enabled, scope={syn.scope}/{syn.window_days}d" if syn.enabled else "off"
+    window = f", downtime={syn.downtime_hours}" if syn.downtime_hours else ""
+    policy_str = (
+        f"\npolicy: reindex={pol.reindex.on}"
+        + (f"/{pol.reindex.every_hours}h" if pol.reindex.every_hours else "")
+        + f", synopsis={syn_str}{window}\nlast maintained: {last}"
+    )
     return (
-        f"name: {name}\nregistered: {reg}\nrecords: {len(corpus)}\nconfig: {cfg}{cal}"
+        f"name: {name}\nregistered: {reg}\nrecords: {len(corpus)}\n"
+        f"config: {cfg}{policy_str}{cal}"
     )
 
 
+def maintain(name=None, *, all=False, dry_run=False):
+    """Run due background work: incremental reindex, synopsis in its downtime window.
+
+    With a name, maintains that corpus; with --all (or no name), every registered
+    corpus. Idempotent and safe to schedule (cron/launchd): it no-ops what is not
+    due. --dry-run reports what would run without doing it.
+    """
+    from .maintenance import maintain as _maintain
+
+    results = _maintain(name=name, all=all, dry_run=dry_run)
+    if not results:
+        return "no corpora registered"
+    return "\n".join(str(r) for r in results)
+
+
 def rm(name):
     """Unregister a corpus (does not delete its built data)."""
     registry.unregister(name)
@@ -202,12 +253,12 @@ def eval(name, cases, *, mode="hybrid", k=10):
 
 
 def eval_gen(name, out, *, k=5, abstention_frac=0.15, max_artifacts=None):
-    """Generate an eval-case file for a corpus by back-translation (needs oa/LLM).
+    """Generate an eval-case file for a corpus by back-translation (needs aix/LLM).
 
     Writes a DiscoveryCase JSONL set (gold cases + an abstention slice) for the
     registered corpus *name* to *out*, stamping a corpus-signature into the
     header so the frozen file can be checked against the live corpus later. This
-    command calls an LLM via oa; scoring it afterwards (`ir eval`) is offline.
+    command calls an LLM via aix; scoring it afterwards (`ir eval`) is offline.
     """
     from .eval import save_cases
     from .eval_gen import build_eval_set, corpus_signature
@@ -375,6 +426,7 @@ def calibrate_min_score(
     search,
     discover,
     info,
+    maintain,
     rm,
     eval,
     eval_gen,
diff --git a/ir/eval_gen.py b/ir/eval_gen.py
index 5922b0e..5cf521d 100644
--- a/ir/eval_gen.py
+++ b/ir/eval_gen.py
@@ -19,9 +19,9 @@
 The LLM is **injected** (`query_generator` / `abstention_generator` callables),
 so the generation *logic* — masking, gold assignment, the leakage guard, the
 abstention fraction — is fully testable with a deterministic stub and no network.
-The default generators are built lazily on :mod:`oa` (`oa.prompt_function`), so
-``import ir.eval_gen`` stays cheap and offline; ``oa`` is only imported when you
-actually generate with the real LLM.
+The default generators are built lazily on :mod:`aix` (`aix.prompt_func`, the
+multi-provider LLM facade), so ``import ir.eval_gen`` stays cheap and offline;
+``aix`` is only imported when you actually generate with the real LLM.
 
 The output is plain :class:`~ir.eval.DiscoveryCase` data — freeze it with
 :func:`ir.eval.save_cases` (stamping :func:`corpus_signature` into the
@@ -34,7 +34,7 @@
     from ir import eval_gen as eg
 
     source = ir.CorpusSource.from_skills()
-    cases = eg.build_eval_set(source, k=5, corpus_name="skills")   # uses oa
+    cases = eg.build_eval_set(source, k=5, corpus_name="skills")   # uses aix
     from ir.eval import save_cases
     save_cases(cases, "skills_eval.jsonl",
                meta={"corpus": "skills", "corpus_signature": eg.corpus_signature(source)})
@@ -173,7 +173,7 @@ def _leaks_name(text: str, name: str) -> bool:
 
 
 # =========================================================================== #
-# Default (oa-backed) generators — lazily built, only when actually used
+# Default (aix-backed) generators — lazily built, only when actually used
 # =========================================================================== #
 
 
@@ -199,13 +199,13 @@ def _parse_lines(text: Any) -> list[str]:
     return lines
 
 
-def make_oa_query_generator(
+def make_default_query_generator(
     *, prompt: str = BACKTRANSLATION_PROMPT, **prompt_function_kwargs: Any
 ) -> QueryGenerator:
-    """Build the default back-translation generator on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default back-translation generator on :mod:`aix` (lazy import)."""
+    import aix
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_lines, name="backtranslate", **prompt_function_kwargs
     )
 
@@ -215,13 +215,13 @@ def generate(description: str, *, n: int) -> list[str]:
     return generate
 
 
-def make_oa_abstention_generator(
+def make_default_abstention_generator(
     *, prompt: str = ABSTENTION_PROMPT, **prompt_function_kwargs: Any
 ) -> AbstentionGenerator:
-    """Build the default abstention generator on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default abstention generator on :mod:`aix` (lazy import)."""
+    import aix
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_lines, name="abstention", **prompt_function_kwargs
     )
 
@@ -287,7 +287,7 @@ def generate_cases(
         mask_names: scrub the artifact name from the description before
             generating, and drop any generated intent that still contains it.
         query_generator: ``(description, *, n) -> [intent, …]``. Defaults to the
-            :mod:`oa`-backed back-translator (built lazily; needs a model).
+            :mod:`aix`-backed back-translator (built lazily; needs a model).
         describe: ``raw -> description`` (default: the ``description`` / ``text``
             field, else the joined string fields).
         min_chars: skip artifacts whose description is shorter than this.
@@ -304,7 +304,7 @@ def generate_cases(
     """
     if k < 1:
         raise ValueError(f"k must be >= 1, got {k!r}.")
-    gen = query_generator or make_oa_query_generator()
+    gen = query_generator or make_default_query_generator()
     describe = describe or _default_describe
     cases: list[DiscoveryCase] = []
     skipped = 0
@@ -368,7 +368,7 @@ def generate_abstention_cases(
     """Generate ``n`` abstention cases — out-of-scope intents (empty ``gold``)."""
     if n <= 0:
         return []
-    gen = generator or make_oa_abstention_generator()
+    gen = generator or make_default_abstention_generator()
     intents = gen(n=n, theme=theme)
     cases = [
         DiscoveryCase(
diff --git a/ir/formulate.py b/ir/formulate.py
index 1ed5590..a4a6f66 100644
--- a/ir/formulate.py
+++ b/ir/formulate.py
@@ -17,7 +17,7 @@
 lives in the agent layer (``raglab``), not here.
 
 :func:`make_llm_formulator` mirrors :func:`ir.select.make_llm_selector`: an
-injectable ``rewriter`` callable, built lazily on :mod:`oa` when omitted (so
+injectable ``rewriter`` callable, built lazily on :mod:`aix` when omitted (so
 importing ir stays offline), falling back to identity on any failure — a
 formulator must never make retrieval *worse* than the raw query.
 """
@@ -47,13 +47,13 @@ def identity_formulator(query: str) -> str:
 
 
 def _default_llm_rewriter(prompt: str, n: int, **prompt_function_kwargs: Any):
-    """Build the default LLM rewriter on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default LLM rewriter on :mod:`aix` (lazy import)."""
+    import aix
 
     def _parse_lines(text: str) -> list[str]:
         return [line.strip(" -\t") for line in str(text).splitlines() if line.strip()]
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_lines, name="formulate_queries", **prompt_function_kwargs
     )
 
@@ -74,8 +74,8 @@ def make_llm_formulator(
     """An LLM-backed :data:`Formulator` (rewrite / expand / multi-query).
 
     ``rewriter`` is an injectable ``query -> str | [str, ...]`` callable (a test
-    double, or your own router); when omitted it is built lazily on :mod:`oa`
-    (``oa.prompt_function``), so importing this module stays offline. ``n`` is the
+    double, or your own router); when omitted it is built lazily on :mod:`aix`
+    (``aix.prompt_func``), so importing this module stays offline. ``n`` is the
     multi-query fan-out width. Any error or empty reply falls back to ``fallback``
     (default: :func:`identity_formulator`).
     """
diff --git a/ir/index.py b/ir/index.py
index bc27b82..780f0e9 100644
--- a/ir/index.py
+++ b/ir/index.py
@@ -73,6 +73,37 @@ def _embed_batched(emb, texts, input_type, batch_size):
     return np.vstack(out) if out else np.zeros((0, 0), dtype=np.float32)
 
 
+class _LazyEmbedder:
+    """An embedder callable that resolves its model on first *call*, not first use.
+
+    :func:`open_corpus` binds this as a :class:`Corpus`'s ``embedder`` so the
+    (heavy, ~seconds) embedding-model load is paid only when a query is actually
+    embedded — ``ls`` / ``info`` and lexical-only search, which never embed,
+    never trigger it. The corpus's ``embedder_id`` is read from stored config, so
+    it is known without resolving the model. Transparent: it forwards ``__call__``
+    (including ``input_type=``) to the resolved embedder, so every existing call
+    site (``_embed`` and its ``TypeError`` fallback) works unchanged.
+    """
+
+    def __init__(self, spec: Any):
+        self._spec = spec
+        self._resolved: Callable | None = None
+        self._resolved_id: str | None = None
+
+    def _resolve(self) -> Callable:
+        if self._resolved is None:
+            self._resolved, self._resolved_id = make_embedder(self._spec)
+        return self._resolved
+
+    def __call__(self, texts, **kwargs):
+        return self._resolve()(texts, **kwargs)
+
+    @property
+    def embedder_id(self) -> str:
+        self._resolve()
+        return self._resolved_id or "custom"
+
+
 @dataclass
 class Corpus:
     """A built, queryable corpus: a store plus its embedder."""
@@ -222,9 +253,16 @@ def build(
 
 
 def open_corpus(name: str, *, embedder: Any = None) -> Corpus:
-    """Reopen a previously built corpus by name (resolves its embedder)."""
+    """Reopen a previously built corpus by name.
+
+    The embedding model is **lazily** resolved (see :class:`_LazyEmbedder`): the
+    returned corpus knows its ``embedder_id`` from stored config immediately, but
+    only loads the model when a dense/hybrid query actually embeds. So ``ir ls``,
+    ``ir info``, and lexical-only search open a corpus without the model-load
+    cost. Pass ``embedder=`` to override the stored spec.
+    """
     store = CorpusStore.local(name)
     cfg = store.get_config()
     spec = embedder if embedder is not None else cfg.get("embedder_spec", "default")
-    emb, emb_id = make_embedder(spec)
-    return Corpus(name, store, emb, cfg.get("embedder_id", emb_id))
+    emb_id = cfg.get("embedder_id") or ""
+    return Corpus(name, store, _LazyEmbedder(spec), emb_id)
diff --git a/ir/maintenance.py b/ir/maintenance.py
new file mode 100644
index 0000000..b481b69
--- /dev/null
+++ b/ir/maintenance.py
@@ -0,0 +1,136 @@
+"""Idempotent background-work runner — ``ir maintain`` (issue #58).
+
+The *executing* half of ir's maintenance story (the declarative half is
+:mod:`ir.policy`). :func:`maintain` reads each corpus's resolved
+:class:`~ir.policy.MaintenancePolicy` and does the work that is **due**:
+
+- an **incremental rebuild** when reindex is due (``source-change`` is always
+  due — the build is a near-no-op when nothing changed; ``interval`` only when
+  stale; ``manual`` never);
+- when ``synopsis.enabled``, the rebuild's strategy is wrapped in
+  :func:`ir.with_synopsis`, so new / changed artifacts gain an LLM synopsis —
+  and because that build may call an LLM, it is run **only inside the policy's
+  downtime window**.
+
+It is safe to call as often as a scheduler likes: it no-ops when nothing is due,
+and records ``last_maintained`` so interval policies converge. ir runs the work;
+*scheduling* it is external — a cron / launchd entry calls ``ir maintain --all``
+every N minutes, and the downtime window lives in the policy (data), not in ir.
+
+    # crontab: try the queue every 15 minutes; ir decides what is actually due.
+    */15 * * * *  ir maintain --all >> ~/.cache/ir/maintain.log 2>&1
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, replace as dc_replace
+from datetime import datetime
+from typing import Any
+
+from . import registry
+from .index import build, open_corpus
+from .policy import in_downtime, is_reindex_due, resolve_policy
+
+
+@dataclass
+class MaintenanceResult:
+    """What :func:`maintain_corpus` did (or would do) for one corpus."""
+
+    name: str
+    ran: bool
+    reason: str
+    reindex: bool = False
+    synopsis: bool = False
+    records: int | None = None
+
+    def __str__(self) -> str:
+        verb = "maintained" if self.ran else "skipped"
+        bits = [b for b, on in (("reindex", self.reindex), ("synopsis", self.synopsis)) if on]
+        what = "+".join(bits) or "-"
+        recs = f", {self.records} records" if self.records is not None else ""
+        return f"{self.name}: {verb} [{what}] ({self.reason}){recs}"
+
+
+def _now(now: datetime | None) -> datetime:
+    # Local clock: the downtime window is expressed in local hours, and interval
+    # math stays consistent by using the same clock for both. Injectable for tests.
+    return now if now is not None else datetime.now()
+
+
+def _parse_iso(s: Any) -> datetime | None:
+    if not s:
+        return None
+    try:
+        return datetime.fromisoformat(s)
+    except (TypeError, ValueError):
+        return None
+
+
+def _synopsis_wrapped(source):
+    """Return *source* with its strategy wrapped in :func:`ir.with_synopsis`."""
+    from .synopsis import with_synopsis
+
+    return dc_replace(source, indexing_strategy=with_synopsis(source.indexing_strategy))
+
+
+def maintain_corpus(
+    name: str, *, now: datetime | None = None, dry_run: bool = False, full: bool = True
+) -> MaintenanceResult:
+    """Do the due background work for one corpus (idempotent).
+
+    Reads the corpus's resolved policy and its ``last_maintained`` time, decides
+    whether a (synopsis-aware) reindex is due *and* permitted now, and — unless
+    ``dry_run`` — runs the incremental build and records the run.
+    """
+    now = _now(now)
+    policy = resolve_policy(registry.get(name))
+    store = open_corpus(name).store  # lazy: opening never loads the model
+    last = _parse_iso(store.get_maintenance_state().get("last_maintained"))
+
+    due = is_reindex_due(policy, last, now)
+    synopsis_on = policy.synopsis.enabled
+    # A synopsis build may hit an LLM, so it is confined to the downtime window.
+    blocked = synopsis_on and not in_downtime(policy, now)
+
+    if not due or blocked:
+        reason = (
+            "synopsis build deferred to downtime window"
+            if due and blocked
+            else f"not due ({policy.reindex.on})"
+        )
+        return MaintenanceResult(name, False, reason, reindex=due, synopsis=synopsis_on)
+
+    if dry_run:
+        return MaintenanceResult(
+            name, False, "dry-run (would reindex)", reindex=True, synopsis=synopsis_on
+        )
+
+    source = registry.source_for(name)
+    if synopsis_on:
+        source = _synopsis_wrapped(source)
+    built = build(source, full=full)
+    built.store.set_maintenance_state(
+        {"last_maintained": now.isoformat(), "synopsis_enabled": synopsis_on}
+    )
+    return MaintenanceResult(
+        name, True, "reindexed", reindex=True, synopsis=synopsis_on, records=len(built)
+    )
+
+
+def maintain(
+    name: str | None = None,
+    *,
+    all: bool = False,
+    now: datetime | None = None,
+    dry_run: bool = False,
+) -> list[MaintenanceResult]:
+    """Run due background work for one corpus (*name*) or every registered one (*all*).
+
+    With neither, defaults to all registered corpora. Returns one
+    :class:`MaintenanceResult` per corpus considered.
+    """
+    if name and not all:
+        names = [name]
+    else:
+        names = list(registry.registered())
+    return [maintain_corpus(n, now=now, dry_run=dry_run) for n in names]
diff --git a/ir/policy.py b/ir/policy.py
new file mode 100644
index 0000000..59582b7
--- /dev/null
+++ b/ir/policy.py
@@ -0,0 +1,243 @@
+"""Per-corpus policy — how a corpus is segmented/stored and what background work it gets.
+
+This is the *declarative* half of ir's maintenance story (issue #58): a corpus's
+policy lives as **data** in its registry entry, and ir resolves an effective
+policy by layering ``entry`` over per-``kind`` defaults over a global default.
+An idempotent :func:`ir.maintenance.maintain` reads the policy and does the due
+work; *scheduling* that work (cron / launchd, or an orchestration layer's budget
+governor) stays **external** — ir never imports an orchestration layer, and there
+is no global ``Settings`` singleton (policy is per-corpus data, injected, not a
+process-wide mutable). The light path stays a one-liner: every field has a smart
+default, so ``ir build skills`` needs no policy at all.
+
+The three policy axes:
+
+- **reindex** — *when* to rebuild: ``"source-change"`` (default; rebuild is a
+  near-no-op when nothing changed, so it is always safe to run), ``"interval"``
+  (only when older than ``every_hours``), or ``"manual"`` (never automatic).
+- **synopsis** — *whether/when* to attach LLM synopses (the expensive,
+  off-by-default work): only the ``recent`` slice, only during ``downtime_hours``.
+  Synopsis is realized as a strategy wrapper (:func:`ir.with_synopsis`), so it is
+  incremental — only new/changed artifacts are synthesized.
+- **storage** — the persistence backend. Today only ``"local"`` (the file store);
+  a ``vd.Collection`` backend is the documented future seam (issue #28).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field, replace
+from datetime import datetime, timedelta
+from typing import Any
+
+# --------------------------------------------------------------------------- #
+# Policy data model (immutable; pure data — no I/O, no model loading)
+# --------------------------------------------------------------------------- #
+
+#: Accepted ``reindex.on`` triggers.
+REINDEX_TRIGGERS = ("source-change", "interval", "manual")
+
+#: Storage backends ir can resolve today. ``"vd"`` (vd.Collection) is the
+#: documented future backend, gated on issue #28.
+STORAGE_BACKENDS = ("local",)
+
+
+@dataclass(frozen=True)
+class ReindexPolicy:
+    """When to (incrementally) rebuild a corpus."""
+
+    on: str = "source-change"
+    every_hours: float | None = None  # used only when ``on == "interval"``
+
+    def __post_init__(self):
+        if self.on not in REINDEX_TRIGGERS:
+            raise ValueError(
+                f"unknown reindex trigger {self.on!r}; expected {REINDEX_TRIGGERS}"
+            )
+
+    @classmethod
+    def from_dict(cls, d: dict | None) -> "ReindexPolicy":
+        d = d or {}
+        return cls(
+            on=d.get("on", "source-change"),
+            every_hours=d.get("every_hours"),
+        )
+
+    def to_dict(self) -> dict:
+        out: dict[str, Any] = {"on": self.on}
+        if self.every_hours is not None:
+            out["every_hours"] = self.every_hours
+        return out
+
+
+@dataclass(frozen=True)
+class SynopsisPolicy:
+    """Whether/when to attach (expensive, LLM-generated) synopses.
+
+    ``downtime_hours`` is a ``[start, end)`` pair of local-clock hours
+    (wrapping past midnight is allowed, e.g. ``(22, 6)``); ``None`` means
+    "any time". ``scope="recent"`` limits synthesis to artifacts whose
+    timestamp is within ``window_days`` (corpora that expose a time signal);
+    ``scope="all"`` synthesizes every artifact (bounded by incrementality).
+    """
+
+    enabled: bool = False
+    scope: str = "recent"  # "recent" | "all"
+    window_days: int = 30
+    downtime_hours: tuple[int, int] | None = None
+
+    @classmethod
+    def from_dict(cls, d: dict | None) -> "SynopsisPolicy":
+        d = d or {}
+        dh = d.get("downtime_hours")
+        return cls(
+            enabled=bool(d.get("enabled", False)),
+            scope=d.get("scope", "recent"),
+            window_days=int(d.get("window_days", 30)),
+            downtime_hours=(int(dh[0]), int(dh[1])) if dh else None,
+        )
+
+    def to_dict(self) -> dict:
+        out: dict[str, Any] = {
+            "enabled": self.enabled,
+            "scope": self.scope,
+            "window_days": self.window_days,
+        }
+        if self.downtime_hours is not None:
+            out["downtime_hours"] = list(self.downtime_hours)
+        return out
+
+
+@dataclass(frozen=True)
+class MaintenancePolicy:
+    """The background-work policy for one corpus (reindex + synopsis)."""
+
+    reindex: ReindexPolicy = field(default_factory=ReindexPolicy)
+    synopsis: SynopsisPolicy = field(default_factory=SynopsisPolicy)
+
+    @classmethod
+    def from_dict(cls, d: dict | None) -> "MaintenancePolicy":
+        d = d or {}
+        return cls(
+            reindex=ReindexPolicy.from_dict(d.get("reindex")),
+            synopsis=SynopsisPolicy.from_dict(d.get("synopsis")),
+        )
+
+    def to_dict(self) -> dict:
+        return {"reindex": self.reindex.to_dict(), "synopsis": self.synopsis.to_dict()}
+
+    def merged(self, override: dict | None) -> "MaintenancePolicy":
+        """Layer an ``override`` dict on top of this policy (entry over defaults)."""
+        if not override:
+            return self
+        return replace(
+            self,
+            reindex=ReindexPolicy.from_dict(
+                {**self.reindex.to_dict(), **(override.get("reindex") or {})}
+            ),
+            synopsis=SynopsisPolicy.from_dict(
+                {**self.synopsis.to_dict(), **(override.get("synopsis") or {})}
+            ),
+        )
+
+
+# --------------------------------------------------------------------------- #
+# Smart defaults per kind — the "rules that lead to defaults" for new corpora
+# --------------------------------------------------------------------------- #
+
+#: The global fallback policy (any kind without a specific default).
+GLOBAL_DEFAULT = MaintenancePolicy()
+
+#: Per-kind maintenance defaults. The rule of thumb encoded here:
+#: *small, fully-enumerable* corpora (skills/packages/reports/files) rebuild on
+#: source change (cheap, exact); *large, append-mostly, time-stamped* corpora
+#: (sessions) rebuild on an interval and keep synopsis off by default, with a
+#: downtime window ready for when it is turned on. New kinds register here.
+DEFAULTS_BY_KIND: dict[str, dict] = {
+    "skills": {"reindex": {"on": "source-change"}},
+    "packages": {"reindex": {"on": "source-change"}},
+    "reports": {"reindex": {"on": "source-change"}},
+    "files": {"reindex": {"on": "source-change"}},
+    "sessions": {
+        "reindex": {"on": "interval", "every_hours": 24},
+        "synopsis": {
+            "enabled": False,
+            "scope": "recent",
+            "window_days": 30,
+            "downtime_hours": [2, 6],
+        },
+    },
+}
+
+
+def default_policy_for_kind(kind: str) -> MaintenancePolicy:
+    """The smart-default :class:`MaintenancePolicy` for a corpus ``kind``."""
+    return GLOBAL_DEFAULT.merged(DEFAULTS_BY_KIND.get(kind))
+
+
+def resolve_policy(entry: dict | None) -> MaintenancePolicy:
+    """The effective policy for a registry ``entry``: entry over kind over global.
+
+    A v1 entry (no ``maintenance`` key) resolves to its kind's smart default, so
+    existing corpora gain a sensible policy without a migration.
+    """
+    entry = entry or {}
+    base = default_policy_for_kind(entry.get("kind", ""))
+    return base.merged(entry.get("maintenance"))
+
+
+def resolve_storage(entry: dict | None) -> dict:
+    """The effective storage spec for an ``entry`` (default ``{"backend": "local"}``)."""
+    storage = dict((entry or {}).get("storage") or {})
+    backend = storage.get("backend", "local")
+    if backend not in STORAGE_BACKENDS:
+        raise NotImplementedError(
+            f"storage backend {backend!r} is not available yet (only "
+            f"{STORAGE_BACKENDS}); a vd.Collection backend is tracked in issue #28."
+        )
+    storage["backend"] = backend
+    return storage
+
+
+# --------------------------------------------------------------------------- #
+# Timing predicates (pure; ``now`` is injected for testability)
+# --------------------------------------------------------------------------- #
+
+
+def is_reindex_due(
+    policy: MaintenancePolicy,
+    last_maintained: datetime | None,
+    now: datetime,
+) -> bool:
+    """Whether a reindex is due under ``policy`` given the last-maintained time.
+
+    - ``source-change`` is always due (the build is incremental and a no-op when
+      nothing changed, so running it cannot do harm).
+    - ``interval`` is due when never maintained or older than ``every_hours``.
+    - ``manual`` is never automatically due.
+    """
+    on = policy.reindex.on
+    if on == "source-change":
+        return True
+    if on == "manual":
+        return False
+    # interval
+    every = policy.reindex.every_hours
+    if not every or last_maintained is None:
+        return True
+    return now - last_maintained >= timedelta(hours=every)
+
+
+def in_downtime(policy: MaintenancePolicy, now: datetime) -> bool:
+    """Whether ``now`` falls inside the synopsis ``downtime_hours`` window.
+
+    ``None`` window means "any time". A window whose start hour is greater than
+    its end hour wraps past midnight (e.g. ``(22, 6)`` is 22:00–06:00).
+    """
+    dh = policy.synopsis.downtime_hours
+    if not dh:
+        return True
+    start, end = dh
+    hour = now.hour
+    if start <= end:
+        return start <= hour < end
+    return hour >= start or hour < end  # wraps midnight
diff --git a/ir/registry.py b/ir/registry.py
index 559ade6..2bf3f2d 100644
--- a/ir/registry.py
+++ b/ir/registry.py
@@ -10,10 +10,11 @@
 - ``skills``   → :meth:`CorpusSource.from_skills`
 - ``packages`` → :meth:`CorpusSource.from_packages`
 - ``reports``  → :meth:`CorpusSource.from_md_reports`
+- ``sessions`` → :meth:`CorpusSource.from_claude_sessions`
 - ``files``    → :meth:`CorpusSource.from_files` (needs ``root``; optional
   ``pattern``)
 
-Unregistered preset names (``skills``/``packages``/``reports``) are
+Unregistered preset names (``skills``/``packages``/``reports``/``sessions``) are
 auto-registered with defaults on first use, so ``ir build skills`` just works.
 """
 
@@ -26,7 +27,7 @@
 from .config import registry_path
 from .sources import CorpusSource
 
-PRESETS = ("skills", "packages", "reports")
+PRESETS = ("skills", "packages", "reports", "sessions")
 
 
 def _load() -> dict[str, Any]:
@@ -40,14 +41,58 @@ def _save(entries: dict[str, Any]) -> None:
     registry_path().write_text(json.dumps(entries, indent=2), encoding="utf-8")
 
 
-def register(name: str, kind: str, *, embedder: str = "default", **params) -> dict:
-    """Register (or overwrite) a named corpus definition."""
+def register(
+    name: str,
+    kind: str,
+    *,
+    embedder: str = "default",
+    strategy: Any = None,
+    maintenance: Mapping[str, Any] | None = None,
+    storage: Mapping[str, Any] | None = None,
+    **params,
+) -> dict:
+    """Register (or overwrite) a named corpus definition.
+
+    Beyond the v1 ``kind`` / ``embedder`` / ``params``, an entry may now carry
+    (all optional, with smart per-kind defaults applied at resolution time — see
+    :mod:`ir.policy`):
+
+    - ``strategy`` — an :class:`~ir.strategy.IndexingStrategy` (or a
+      ``{"name", "params"}`` spec) persisted so the corpus's *segmentation* is
+      stable across rebuilds. ``None`` keeps the preset's default strategy.
+    - ``maintenance`` — the background-work policy dict (``reindex`` / ``synopsis``;
+      validated here, see :class:`ir.policy.MaintenancePolicy`).
+    - ``storage`` — the persistence backend (default ``{"backend": "local"}``).
+
+    Entries written by older ``ir`` (none of these keys) keep working unchanged.
+    """
     if kind not in PRESETS and kind != "files":
         raise ValueError(
             f"Unknown corpus kind {kind!r}; use one of {PRESETS} or 'files'."
         )
+    from .policy import MaintenancePolicy, resolve_storage
+    from .strategy import IndexingStrategy, strategy_to_spec
+
+    entry: dict[str, Any] = {"kind": kind, "embedder": embedder, "params": params}
+    if strategy is not None:
+        if isinstance(strategy, Mapping):
+            entry["strategy"] = dict(strategy)
+        elif isinstance(strategy, IndexingStrategy):
+            entry["strategy"] = strategy_to_spec(strategy)
+        else:
+            raise TypeError(
+                f"strategy must be an IndexingStrategy or a spec dict, got "
+                f"{type(strategy).__name__}"
+            )
+    if maintenance is not None:
+        # Validate (raises on a bad trigger) and store the normalized form.
+        entry["maintenance"] = MaintenancePolicy.from_dict(dict(maintenance)).to_dict()
+    if storage is not None:
+        resolve_storage({"storage": dict(storage)})  # validate backend
+        entry["storage"] = dict(storage)
+
     entries = _load()
-    entries[name] = {"kind": kind, "embedder": embedder, "params": params}
+    entries[name] = entry
     _save(entries)
     return entries[name]
 
@@ -70,22 +115,53 @@ def unregister(name: str) -> None:
 
 
 def source_from_entry(name: str, entry: dict) -> CorpusSource:
-    """Reconstruct a :class:`CorpusSource` from a registry entry."""
+    """Reconstruct a :class:`CorpusSource` from a registry entry.
+
+    A persisted ``strategy`` spec (registry v2) is reconstructed and passed to
+    the preset constructor, so a corpus's segmentation survives across rebuilds.
+    A v1 entry (no ``strategy``) passes ``strategy=None`` and keeps the preset's
+    default — unchanged behavior.
+    """
+    from .strategy import strategy_from_spec
+
     kind = entry["kind"]
     params = dict(entry.get("params", {}))
     embedder = entry.get("embedder", "default")
+    strategy = strategy_from_spec(entry.get("strategy"))
     if kind == "skills":
-        return CorpusSource.from_skills(name=name, embedder=embedder)
+        return CorpusSource.from_skills(name=name, embedder=embedder, strategy=strategy)
     if kind == "packages":
-        return CorpusSource.from_packages(name=name, embedder=embedder)
+        return CorpusSource.from_packages(
+            name=name, embedder=embedder, strategy=strategy
+        )
     if kind == "reports":
-        return CorpusSource.from_md_reports(name=name, embedder=embedder)
+        return CorpusSource.from_md_reports(
+            name=name, embedder=embedder, strategy=strategy
+        )
+    if kind == "sessions":
+        return CorpusSource.from_claude_sessions(
+            name=name, embedder=embedder, strategy=strategy, **params
+        )
     if kind == "files":
         root = params.pop("root")
-        return CorpusSource.from_files(root, name=name, embedder=embedder, **params)
+        return CorpusSource.from_files(
+            root, name=name, embedder=embedder, strategy=strategy, **params
+        )
     raise ValueError(f"Unknown corpus kind {kind!r}.")
 
 
+def policy_for(name: str):
+    """The effective :class:`ir.policy.MaintenancePolicy` for corpus *name*.
+
+    Resolves the registered entry's ``maintenance`` over its kind's smart default
+    over the global default (see :func:`ir.policy.resolve_policy`). An unregistered
+    name resolves to the global default policy.
+    """
+    from .policy import resolve_policy
+
+    return resolve_policy(get(name))
+
+
 def source_for(name: str) -> CorpusSource:
     """Resolve *name* to a source, auto-registering a preset if needed."""
     entry = get(name)
diff --git a/ir/retrieve.py b/ir/retrieve.py
index 3b8c7a4..6af1484 100644
--- a/ir/retrieve.py
+++ b/ir/retrieve.py
@@ -414,7 +414,13 @@ def search(
             return merged[:k]
         query = queries[0]
 
-    ids, mat, metas = corpus.store.matrix()
+    # Lexical ranking scores on text alone, so it must not pay the embedding
+    # matrix's I/O; dense/hybrid need the matrix. (Both reuse the same cache.)
+    if mode == "lexical":
+        ids, metas = corpus.store.metas()
+        mat = None
+    else:
+        ids, mat, metas = corpus.store.matrix()
     if not ids:
         return []
 
diff --git a/ir/select.py b/ir/select.py
index fd25976..f686868 100644
--- a/ir/select.py
+++ b/ir/select.py
@@ -444,7 +444,7 @@ def make_llm_selector(
     The model reads the candidates' descriptions and commits to a subset.
     ``chooser`` is an injectable ``(query, candidates) -> [id, …]`` callable
     (a test double, or your own router); when omitted it is built lazily on
-    :mod:`oa` (``oa.prompt_function``), so importing this module stays offline.
+    :mod:`aix` (``aix.prompt_func``), so importing this module stays offline.
 
     Robustness: any error or empty/garbled reply falls back to ``fallback``
     (default: the ``"conservative"`` heuristic), because LLM selection is known
@@ -490,13 +490,13 @@ def selector(hits: Sequence[SearchHit]) -> list[SearchHit]:
 
 
 def _default_llm_chooser(prompt: str, **prompt_function_kwargs: Any):
-    """Build the default LLM chooser on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default LLM chooser on :mod:`aix` (lazy import)."""
+    import aix
 
     def _parse_ids(text: str) -> list[str]:
         return [line.strip(" -\t") for line in str(text).splitlines() if line.strip()]
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_ids, name="select_capabilities", **prompt_function_kwargs
     )
 
diff --git a/ir/sources.py b/ir/sources.py
index dc63115..65db925 100644
--- a/ir/sources.py
+++ b/ir/sources.py
@@ -26,10 +26,20 @@
 from pathlib import Path
 from typing import Any
 
-from .strategy import Chunked, IndexingStrategy, Package, Skill, WholeText
+from .strategy import (
+    Chunked,
+    ClaudeTurn,
+    IndexingStrategy,
+    Package,
+    Skill,
+    WholeText,
+)
 
 ALLCAPS_MD = re.compile(r"^[A-Z0-9_ ]+\.md$")
 
+#: Default look-back window (days) for :meth:`CorpusSource.from_claude_sessions`.
+DFLT_SESSIONS_SINCE_DAYS = 90
+
 
 def content_hash_signal(artifact_id: str, raw: Any) -> str:
     """Default change signal: a content hash of the raw payload."""
@@ -193,6 +203,67 @@ def metadata_of(aid, raw):
             **kwargs,
         )
 
+    @classmethod
+    def from_claude_sessions(
+        cls,
+        *,
+        name: str = "sessions",
+        since: float | None = DFLT_SESSIONS_SINCE_DAYS,
+        projects: Any = None,
+        include_full: bool = False,
+        include_session_title: bool = True,
+        max_sessions: int | None = None,
+        root: str | Path | None = None,
+        fetcher: Callable[[], list] | None = None,
+        strategy: IndexingStrategy | None = None,
+        **kwargs,
+    ) -> "CorpusSource":
+        """The user's Claude Code session transcripts as a corpus (turn pairs).
+
+        Each artifact is one user→assistant turn pair; the default
+        :class:`~ir.strategy.ClaudeTurn` strategy indexes the user prompt and the
+        assistant's end-of-turn summary as separate surfaces (target either with
+        ``surfaces={"user_prompt"}`` / ``{"assistant_summary"}``). ``include_full``
+        adds the full assistant text surface (off by default — the summary is the
+        signal). ``include_session_title`` (default on) also indexes one record per
+        session whose surface is the session's persisted custom/AI title — a cheap
+        "what was this session about" surface. Scope defaults to the last ``since``
+        days (a full-history build is heavy); narrow with ``projects`` (a cwd
+        substring or list) and ``max_sessions``.
+
+        ``fetcher`` overrides the record source (each a mapping with
+        ``user_prompt`` / ``assistant_summary`` / ... ) — inject a test double to
+        avoid the ``priv`` dependency. Otherwise records come from
+        :func:`priv.claude_transcripts.turn_pair_records`.
+        """
+        if fetcher is not None:
+            records = list(fetcher())
+        else:
+            from priv.claude_transcripts import turn_pair_records
+
+            records = list(
+                turn_pair_records(
+                    root=root,
+                    since=since,
+                    projects=projects,
+                    max_sessions=max_sessions,
+                    include_session_title=include_session_title,
+                )
+            )
+        # Collision-safe scope: ids are session_id:user_uuid (already unique); a
+        # rare missing id falls back to enumeration so no pair is silently dropped.
+        scope: dict[str, dict] = {}
+        for i, r in enumerate(records):
+            key = r.get("id") or f"turn_{i}"
+            scope[key] = r
+
+        return cls(
+            name=name,
+            scope=scope,
+            indexing_strategy=strategy or ClaudeTurn(include_full=include_full),
+            **kwargs,
+        )
+
     @classmethod
     def from_packages(
         cls,
diff --git a/ir/store.py b/ir/store.py
index e7a72d5..d8cee0c 100644
--- a/ir/store.py
+++ b/ir/store.py
@@ -30,14 +30,20 @@
 
 import copy
 import io
+import json
 import os
 from collections.abc import Iterator, Mapping, MutableMapping
+from pathlib import Path
 from typing import Any
 
 import numpy as np
 
 from .base import Record
 
+#: Bump when the on-disk packed-matrix layout changes so a stale cache from an
+#: older ``ir`` is treated as invalid (rebuilt) rather than mis-read.
+_PACKED_FORMAT = 1
+
 
 def _ndarray_store(rootdir) -> MutableMapping[str, np.ndarray]:
     """A ``dol`` file store whose values are float32 ``ndarray``s."""
@@ -78,6 +84,8 @@ def __init__(
         config: MutableMapping[str, Any],
         calibration: MutableMapping[str, Any] | None = None,
         links: MutableMapping[str, Any] | None = None,
+        *,
+        packed_dir: str | Path | None = None,
     ):
         self.meta = meta
         self.vectors = vectors
@@ -89,6 +97,13 @@ def __init__(
         self.calibration = {} if calibration is None else calibration
         self.links = {} if links is None else links
         self._matrix_cache: tuple | None = None
+        # Optional on-disk packed-matrix cache (a single normalized matrix + its
+        # ids/metas as three files), so reopening a corpus skips the per-record
+        # vector-file storm. ``None`` (e.g. for the in-memory store) keeps the
+        # matrix purely in-process, preserving the original behavior. The cache
+        # is a *write-invalidated read cache*: any record write clears it.
+        self._packed_dir = Path(packed_dir) if packed_dir is not None else None
+        self._packed_stale = False
 
     # ----- factories ------------------------------------------------------ #
 
@@ -105,6 +120,7 @@ def local(cls, name: str) -> "CorpusStore":
             config=_json_store(root / "config"),
             calibration=_json_store(root / "calibration"),
             links=_json_store(root / "links"),
+            packed_dir=root / "matrix",
         )
 
     @classmethod
@@ -124,7 +140,7 @@ def put_record(self, record: Record) -> None:
             "metadata": dict(record.metadata),
         }
         self.vectors[record.id] = np.asarray(record.vector, dtype=np.float32)
-        self._matrix_cache = None
+        self._invalidate_matrix()
 
     def delete_record(self, record_id: str) -> None:
         """Remove a record's metadata + vector; a missing id is tolerated."""
@@ -133,7 +149,7 @@ def delete_record(self, record_id: str) -> None:
             del self.vectors[record_id]
         except KeyError:
             pass
-        self._matrix_cache = None
+        self._invalidate_matrix()
 
     def record_ids(self) -> Iterator[str]:
         """Iterate the record ids currently stored."""
@@ -185,6 +201,19 @@ def set_config(self, settings: Mapping[str, Any]) -> None:
         """Persist the corpus build *settings* (name / embedder spec + id)."""
         self.config["config"] = dict(settings)
 
+    def get_maintenance_state(self) -> dict:
+        """Background-work bookkeeping (e.g. ``last_maintained``); ``{}`` if unset.
+
+        Kept under a separate ``config``-view key from the build settings: it is
+        regenerable scheduler state (when ``ir maintain`` last ran), not part of
+        the corpus's build identity, so it must never clobber it.
+        """
+        return dict(self.config.get("maintenance", {}))
+
+    def set_maintenance_state(self, state: Mapping[str, Any]) -> None:
+        """Persist the maintenance bookkeeping for this corpus."""
+        self.config["maintenance"] = dict(state)
+
     # ----- calibration (per-mode) ----------------------------------------- #
 
     def get_calibration(self, mode: str) -> dict | None:
@@ -251,19 +280,144 @@ def matrix(self) -> tuple[list[str], np.ndarray, list[dict]]:
         """Return ``(record_ids, normalized_matrix, metas)`` for brute force.
 
         Rows are L2-normalized so cosine similarity is a dot product. Empty
-        corpora return a ``(0, 0)`` matrix. Cached until the next write.
+        corpora return a ``(0, 0)`` matrix.
+
+        Caching is two-tier: an in-process cache (invalidated on the next write)
+        backed, for file-rooted stores, by an on-disk **packed** cache — one
+        normalized-matrix ``.npy`` plus its ids/metas, written once and reloaded
+        with a single memory-mapped read. The packed cache turns a cold reopen
+        from a per-record vector-file storm (thousands of tiny reads) into three
+        file reads; it is cleared by any record write, so it never goes stale.
         """
         if self._matrix_cache is not None:
             return self._matrix_cache
+        packed = self._load_packed()
+        if packed is not None:
+            self._matrix_cache = packed
+            return packed
+        result = self._build_matrix()
+        self._save_packed(result)
+        self._matrix_cache = result
+        return result
+
+    def metas(self) -> tuple[list[str], list[dict]]:
+        """Return ``(record_ids, metas)`` **without** loading any vectors.
+
+        The vector-free counterpart of :meth:`matrix`, for ranking modes that
+        score on text alone (``mode="lexical"``): they need candidate metadata
+        (text + filter fields) but never the embedding matrix, so they must not
+        pay its I/O. Reuses the in-process or packed cache when present; else
+        reads only the ``meta`` view (not ``vectors``).
+        """
+        if self._matrix_cache is not None:
+            ids, _mat, metas = self._matrix_cache
+            return ids, metas
+        packed = self._load_packed()
+        if packed is not None:
+            self._matrix_cache = packed
+            return packed[0], packed[2]
+        ids = list(self.meta)
+        metas = [self.meta[rid] for rid in ids]
+        return ids, metas
+
+    def _build_matrix(self) -> tuple[list[str], np.ndarray, list[dict]]:
+        """Build ``(ids, normalized_matrix, metas)`` from the per-record stores.
+
+        One pass over the ids reads each record's meta and vector together
+        (the previous implementation iterated the meta view three times).
+        """
         ids = list(self.meta)
         if not ids:
-            self._matrix_cache = ([], np.zeros((0, 0), dtype=np.float32), [])
-            return self._matrix_cache
-        rows = [np.asarray(self.vectors[rid], dtype=np.float32) for rid in ids]
+            return ([], np.zeros((0, 0), dtype=np.float32), [])
+        metas: list[dict] = []
+        rows: list[np.ndarray] = []
+        for rid in ids:
+            metas.append(self.meta[rid])
+            rows.append(np.asarray(self.vectors[rid], dtype=np.float32))
         mat = np.vstack(rows)
         norms = np.linalg.norm(mat, axis=1, keepdims=True)
         norms[norms == 0] = 1.0
         mat = mat / norms
-        metas = [self.meta[rid] for rid in ids]
-        self._matrix_cache = (ids, mat, metas)
-        return self._matrix_cache
+        return (ids, mat, metas)
+
+    # ----- packed-matrix disk cache --------------------------------------- #
+
+    def _invalidate_matrix(self) -> None:
+        """Drop the in-process matrix and clear the on-disk packed cache once.
+
+        Called on every record write. The on-disk clear happens at most once per
+        rebuild (guarded by ``_packed_stale``) so a bulk build's thousands of
+        ``put_record`` calls don't each touch the filesystem.
+        """
+        self._matrix_cache = None
+        if self._packed_dir is not None and not self._packed_stale:
+            self._clear_packed()
+            self._packed_stale = True
+
+    def _packed_paths(self):
+        d = self._packed_dir
+        # ``sig`` is written last and removed first, so a half-written or
+        # half-cleared cache (no/sig-less dir) always reads as invalid.
+        return {
+            "sig": d / "sig.json",
+            "matrix": d / "matrix.npy",
+            "ids": d / "ids.json",
+            "metas": d / "metas.json",
+        }
+
+    def _clear_packed(self) -> None:
+        if self._packed_dir is None:
+            return
+        paths = self._packed_paths()
+        for key in ("sig", "matrix", "ids", "metas"):  # sig first
+            try:
+                paths[key].unlink()
+            except OSError:
+                pass
+
+    def _load_packed(self):
+        """Load ``(ids, mmap_matrix, metas)`` from the packed cache, or ``None``."""
+        if self._packed_dir is None:
+            return None
+        paths = self._packed_paths()
+        if not paths["sig"].exists():
+            return None
+        try:
+            sig = json.loads(paths["sig"].read_text(encoding="utf-8"))
+            if sig.get("format") != _PACKED_FORMAT:
+                return None
+            mat = np.load(paths["matrix"], mmap_mode="r")
+            ids = json.loads(paths["ids"].read_text(encoding="utf-8"))
+            metas = json.loads(paths["metas"].read_text(encoding="utf-8"))
+        except (OSError, ValueError):
+            return None
+        if len(ids) != mat.shape[0] or len(metas) != len(ids):
+            return None
+        return (ids, mat, metas)
+
+    def _save_packed(self, result: tuple[list[str], np.ndarray, list[dict]]) -> None:
+        """Persist a freshly built matrix to the packed cache (best-effort).
+
+        Skips empty corpora. Writes ``sig.json`` last so a crash mid-write
+        leaves the cache marked invalid (no sig) rather than torn.
+        """
+        if self._packed_dir is None:
+            return
+        ids, mat, metas = result
+        if not ids:
+            return
+        try:
+            self._packed_dir.mkdir(parents=True, exist_ok=True)
+            paths = self._packed_paths()
+            np.save(paths["matrix"], np.asarray(mat, dtype=np.float32))
+            paths["ids"].write_text(json.dumps(ids), encoding="utf-8")
+            paths["metas"].write_text(json.dumps(metas), encoding="utf-8")
+            paths["sig"].write_text(
+                json.dumps({"format": _PACKED_FORMAT, "count": len(ids)}),
+                encoding="utf-8",
+            )
+            self._packed_stale = False
+        except OSError:
+            # A read cache that can't be written is non-fatal: fall back to the
+            # in-process cache (already set by the caller) for this process.
+            self._clear_packed()
diff --git a/ir/strategy.py b/ir/strategy.py
index c6bcdcc..62c327e 100644
--- a/ir/strategy.py
+++ b/ir/strategy.py
@@ -184,6 +184,53 @@ def decompose(self, artifact_id, raw, metadata=None) -> IndexPlan:
         return IndexPlan(filter_fields=filter_fields, surfaces=surfaces)
 
 
+#: Shipped strategies addressable by name, for persisting an
+#: :class:`IndexingStrategy` in a registry entry and reconstructing it (#58).
+#: New shipped strategies register here so a corpus can name its segmentation.
+STRATEGY_REGISTRY: dict[str, type] = {
+    "WholeText": WholeText,
+    "Chunked": Chunked,
+    "Skill": Skill,
+}
+
+
+def strategy_to_spec(strategy: Any) -> dict:
+    """A ``{"name", "params"}`` spec for a shipped, scalar-param strategy.
+
+    Captures only scalar constructor parameters (the same identity surface
+    :func:`ir.index._strategy_id` stamps), so it round-trips the shipped
+    strategies. A custom strategy, or one wrapping another (e.g. the
+    :func:`ir.with_synopsis` wrapper), is **not** captured here — those are set
+    programmatically at build time / by the maintenance layer, not persisted as a
+    segmentation spec.
+    """
+    name = type(strategy).__name__
+    params = {
+        k: v
+        for k, v in vars(strategy).items()
+        if isinstance(v, (str, int, float, bool, type(None)))
+    }
+    return {"name": name, "params": params}
+
+
+def strategy_from_spec(spec: Mapping[str, Any] | None) -> "IndexingStrategy | None":
+    """Reconstruct a shipped strategy from a ``{"name", "params"}`` spec.
+
+    ``None`` (no persisted strategy) returns ``None`` so the caller falls back to
+    the source preset's default strategy — the back-compatible behavior for v1
+    registry entries.
+    """
+    if not spec:
+        return None
+    name = spec.get("name")
+    cls = STRATEGY_REGISTRY.get(name)
+    if cls is None:
+        raise ValueError(
+            f"unknown strategy {name!r}; known: {sorted(STRATEGY_REGISTRY)}"
+        )
+    return cls(**dict(spec.get("params") or {}))
+
+
 class Package:
     """Package strategy: ``name + description`` surface plus README chunks.
 
@@ -242,3 +289,74 @@ def decompose(self, artifact_id, raw, metadata=None) -> IndexPlan:
         # Drop empty surfaces (e.g. no description and no README).
         surfaces = [s for s in surfaces if s.text.strip()]
         return IndexPlan(filter_fields=filter_fields, surfaces=surfaces)
+
+
+class ClaudeTurn:
+    """Index a Claude Code session turn-pair: user prompt + assistant summary.
+
+    Two surfaces by default — ``user_prompt`` (what the human asked) and
+    ``assistant_summary`` (the assistant's *final* end-of-turn text, the
+    highest-signal "here's what I did"; the deliberation before it is mostly
+    noise) — so a query can target either side via ``surfaces={"user_prompt"}`` /
+    ``{"assistant_summary"}``. With ``include_full=True`` a third
+    ``assistant_full`` surface (all the turn's assistant natural-language text)
+    trades noise for recall — off by default. Session / project / time / model /
+    tool-use become hard-filter fields.
+
+    The raw artifact is a turn-pair record (see
+    :func:`priv.claude_transcripts.turn_pair_records`): a mapping with
+    ``user_prompt`` / ``assistant_summary`` / ``assistant_full`` plus metadata.
+    """
+
+    def __init__(self, *, include_full: bool = False):
+        self.include_full = include_full
+
+    def decompose(self, artifact_id, raw, metadata=None) -> IndexPlan:
+        meta = dict(metadata or {})
+        raw = raw if isinstance(raw, Mapping) else {}
+        user = str(raw.get("user_prompt", "") or "").strip()
+        summary = str(raw.get("assistant_summary", "") or "").strip()
+        full = str(raw.get("assistant_full", "") or "").strip()
+        title = str(raw.get("session_title", "") or "").strip()
+        filter_fields = {
+            "session_id": raw.get("session_id"),
+            "project": raw.get("project"),
+            "cwd": raw.get("cwd"),
+            "git_branch": raw.get("git_branch"),
+            "timestamp": raw.get("timestamp"),
+            "model": raw.get("model"),
+            "has_tool_use": bool(raw.get("has_tool_use")),
+            "session_title": raw.get("session_title"),
+            **meta,
+        }
+        surfaces = []
+        # A dedicated per-session title record indexes just the title (a cheap,
+        # searchable "what was this session about" surface).
+        if raw.get("record_type") == "session_title":
+            if title:
+                surfaces.append(
+                    Surface(artifact_id, "session_title", title, granularity="field")
+                )
+            return IndexPlan(filter_fields=filter_fields, surfaces=surfaces)
+        if user:
+            surfaces.append(
+                Surface(artifact_id, "user_prompt", user, granularity="field")
+            )
+        if summary:
+            surfaces.append(
+                Surface(
+                    artifact_id, "assistant_summary", summary, granularity="field"
+                )
+            )
+        if self.include_full and full and full != summary:
+            surfaces.append(
+                Surface(
+                    artifact_id, "assistant_full", full, granularity="document"
+                )
+            )
+        return IndexPlan(filter_fields=filter_fields, surfaces=surfaces)
+
+
+# Register the strategies defined after STRATEGY_REGISTRY's literal above.
+STRATEGY_REGISTRY["Package"] = Package
+STRATEGY_REGISTRY["ClaudeTurn"] = ClaudeTurn
diff --git a/ir/synopsis.py b/ir/synopsis.py
index c325b18..c6ca6ee 100644
--- a/ir/synopsis.py
+++ b/ir/synopsis.py
@@ -19,9 +19,10 @@
 a synth that returns ``""`` simply leaves the artifact with its other surfaces.
 
 ``synthesize: Callable[[Artifact], str]`` is **injectable** (a test double, or
-your own summarizer); omitted, it is built lazily on :mod:`oa` via
-:func:`make_llm_synthesizer` (the ``make_llm_*`` idiom — ``import ir`` stays
-offline, ``oa`` is imported only on the first synthesis).
+your own summarizer); omitted, it is built lazily on :mod:`aix` (the
+multi-provider LLM facade) via :func:`make_llm_synthesizer` (the ``make_llm_*``
+idiom — ``import ir`` stays offline, ``aix`` is imported only on the first
+synthesis).
 
 **Staleness.** The wrapper exposes its identity as scalar attributes
 (``synthesizer_id``, ``synopsis_kind``) and holds the inner strategy, so
@@ -79,13 +80,13 @@ def _prompt_hash(prompt: str) -> str:
 def _default_llm_summarizer(
     prompt: str, model: str | None, **prompt_function_kwargs: Any
 ):
-    """Build the default text→synopsis summarizer on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default text→synopsis summarizer on :mod:`aix` (lazy import)."""
+    import aix
 
     kwargs = dict(prompt_function_kwargs)
     if model is not None:
         kwargs.setdefault("model", model)
-    fn = oa.prompt_function(prompt, name="synthesize_synopsis", **kwargs)
+    fn = aix.prompt_func(prompt, name="synthesize_synopsis", **kwargs)
 
     def summarize(text: str) -> str:
         return str(fn(text=text) or "").strip()
@@ -105,8 +106,8 @@ def make_llm_synthesizer(
     """An LLM-backed :data:`Synthesizer` (:class:`~ir.base.Artifact` → synopsis).
 
     ``summarize`` is an injectable ``text -> str`` callable (a test double, or
-    your own summarizer); when omitted it is built lazily on :mod:`oa`
-    (``oa.prompt_function``) on the **first** synthesis and reused — so importing
+    your own summarizer); when omitted it is built lazily on :mod:`aix`
+    (``aix.prompt_func``) on the **first** synthesis and reused — so importing
     this module, and even constructing the synthesizer, stays offline. The
     artifact's text is extracted with :func:`ir.strategy.text_of` using
     ``text_key`` — which :func:`with_synopsis` threads from the inner strategy, so
@@ -115,7 +116,7 @@ def make_llm_synthesizer(
     fabricated summary).
 
     The returned callable carries a ``synthesizer_id`` attribute (default
-    ``"oa:{model}:{sha(prompt)[:12]}"``) that :func:`with_synopsis` reads into the
+    ``"aix:{model}:{sha(prompt)[:12]}"``) that :func:`with_synopsis` reads into the
     corpus's ``strategy_id`` for staleness — a prompt or model change re-synthesizes.
     """
     cache: dict[str, Callable[[str], str]] = {}
@@ -140,7 +141,7 @@ def synthesize(artifact: Artifact) -> str:
         return out.strip() if isinstance(out, str) else ""
 
     synthesize.synthesizer_id = (
-        synthesizer_id or f"oa:{model or 'default'}:{_prompt_hash(prompt)}"
+        synthesizer_id or f"aix:{model or 'default'}:{_prompt_hash(prompt)}"
     )
     return synthesize
 
@@ -229,7 +230,7 @@ def with_synopsis(
         strategy: the inner :class:`~ir.strategy.IndexingStrategy` (``Chunked``,
             ``Package``, ...). Its surfaces are kept; the synopsis is prepended.
         synthesize: an injectable ``Artifact -> str`` (test double / custom
-            summarizer). Omitted → :func:`make_llm_synthesizer` (lazy ``oa``).
+            summarizer). Omitted → :func:`make_llm_synthesizer` (lazy ``aix``).
         synthesizer_id: explicit identity stamp for staleness (recommended when
             injecting an unnamed callable / lambda). Omitted → the synthesizer's
             own ``synthesizer_id`` / ``__qualname__``.
diff --git a/pyproject.toml b/pyproject.toml
index dbaf918..6935a40 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ docs = [
     "sphinx-rtd-theme>=1.0",
 ]
 llm = [
-    "oa",
+    "aix",
 ]
 
 [tool.ruff]
diff --git a/tests/test_eval_gen.py b/tests/test_eval_gen.py
index 0627ab4..cb0c3ad 100644
--- a/tests/test_eval_gen.py
+++ b/tests/test_eval_gen.py
@@ -2,7 +2,7 @@
 
 The LLM is injected (`query_generator` / `abstention_generator`), so masking, the
 leakage guard, gold assignment, the abstention fraction, and end-to-end
-scorability are all tested without `oa` or a model.
+scorability are all tested without `aix` or a model.
 """
 
 import warnings
@@ -372,31 +372,37 @@ def test_build_eval_set_forwards_mask_names_false():
 
 
 # --------------------------------------------------------------------------- #
-# Default oa-backed generators — prompt assembly (skipped if oa absent)
+# Default aix-backed generators — prompt assembly + egress wiring
+# (mock aix's chat so no LLM/network is touched; skipped if aix absent)
 # --------------------------------------------------------------------------- #
 
 
-def test_oa_prompts_substitute_placeholders():
-    oa = pytest.importorskip("oa")
-    # prompt-only (prompt_func=None): assert on the ASSEMBLED PROMPT, not the
-    # generate() wrapper (which, with prompt_func=None, would iterate the string).
-    bt = oa.prompt_function(eg.BACKTRANSLATION_PROMPT, name="bt", prompt_func=None)
-    prompt = bt(description="DESC_MARKER", n=4)
-    assert "DESC_MARKER" in prompt and "Write 4 natural" in prompt
-    ab = oa.prompt_function(eg.ABSTENTION_PROMPT, name="ab", prompt_func=None)
-    assert "THEME_MARKER" in ab(theme="THEME_MARKER", n=2)
+def test_default_generator_prompts_substitute_placeholders(monkeypatch):
+    aix_prompts = pytest.importorskip("aix.prompts")
+    captured = {}
 
+    def fake_chat(prompt, **kwargs):
+        captured["prompt"] = prompt
+        return "q1\nq2\nq3\nq4"
 
-def test_parse_lines_is_wired_as_egress():
-    oa = pytest.importorskip("oa")
+    monkeypatch.setattr(aix_prompts, "chat", fake_chat)
+    gen = eg.make_default_query_generator()
+    out = gen("DESC_MARKER", n=4)
+    assert "DESC_MARKER" in captured["prompt"] and "Write 4 natural" in captured["prompt"]
+    assert out == ["q1", "q2", "q3", "q4"]
 
-    def fake_llm(*args, **kwargs):
-        return "1. foo\n- bar"
+    monkeypatch.setattr(aix_prompts, "chat", lambda p, **k: captured.update(prompt=p) or "t")
+    eg.make_default_abstention_generator()(n=2, theme="THEME_MARKER")
+    assert "THEME_MARKER" in captured["prompt"]
 
-    fn = oa.prompt_function(
-        "x {description} {n}", egress=eg._parse_lines, prompt_func=fake_llm
-    )
-    assert fn(description="d", n=2) == ["foo", "bar"]
+
+def test_parse_lines_is_wired_as_egress(monkeypatch):
+    aix_prompts = pytest.importorskip("aix.prompts")
+    # The default generator's egress (_parse_lines) strips list markers like
+    # "1. " / "- " from the LLM text.
+    monkeypatch.setattr(aix_prompts, "chat", lambda p, **k: "1. foo\n- bar")
+    gen = eg.make_default_query_generator()
+    assert gen("d", n=2) == ["foo", "bar"]
 
 
 # --------------------------------------------------------------------------- #
diff --git a/tests/test_formulate.py b/tests/test_formulate.py
index e91772a..d35feb6 100644
--- a/tests/test_formulate.py
+++ b/tests/test_formulate.py
@@ -2,7 +2,7 @@
 
 Identity by default (search unchanged); a single rewrite redirects retrieval;
 multi-query fan-out unions+fuses; the LLM formulator is injectable and falls back
-to identity on any failure. Hermetic: the light embedder, no ``oa``.
+to identity on any failure. Hermetic: the light embedder, no ``aix``.
 """
 
 import ir
@@ -94,8 +94,8 @@ def test_make_llm_formulator_falls_back_on_empty_reply():
     assert make_llm_formulator(rewriter=lambda q: [])("deploy") == "deploy"
 
 
-def test_make_llm_formulator_with_injected_rewriter_needs_no_oa():
-    # The injected-rewriter path must not touch the lazy oa builder; a single-str
+def test_make_llm_formulator_with_injected_rewriter_needs_no_aix():
+    # The injected-rewriter path must not touch the lazy aix builder; a single-str
     # rewrite is normalized to a one-element list (a valid Formulator output).
     f = make_llm_formulator(rewriter=lambda q: q)
     assert f("anything") == ["anything"]
diff --git a/tests/test_lazy_open.py b/tests/test_lazy_open.py
new file mode 100644
index 0000000..b3c0962
--- /dev/null
+++ b/tests/test_lazy_open.py
@@ -0,0 +1,71 @@
+"""``open_corpus`` defers the embedding-model load until a query embeds (#56).
+
+The model load is the dominant per-process cost; ``ls`` / ``info`` and
+lexical-only search never embed, so they must not pay it. A corpus knows its
+``embedder_id`` from stored config immediately, but resolves the model lazily.
+"""
+
+import numpy as np
+
+import ir
+import ir.index as idx
+from ir.index import _LazyEmbedder
+
+
+def test_lazy_embedder_resolves_only_on_first_call(monkeypatch):
+    calls = []
+
+    def fake_make_embedder(spec, **kw):
+        calls.append(spec)
+
+        def emb(texts, **kwargs):
+            return np.ones((len(list(texts)), 4), dtype=np.float32)
+
+        return emb, "fake-id"
+
+    monkeypatch.setattr(idx, "make_embedder", fake_make_embedder)
+
+    lazy = _LazyEmbedder("default")
+    assert calls == []  # constructing does not resolve the model
+
+    out = lazy(["x"])  # first call resolves
+    assert calls == ["default"]
+    assert out.shape == (1, 4)
+
+    lazy(["y"])  # cached: no second resolve
+    assert calls == ["default"]
+    assert lazy.embedder_id == "fake-id"
+
+
+def test_open_corpus_does_not_load_model_for_len_or_lexical(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_DATA_DIR", str(tmp_path / "data"))
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "config"))
+    monkeypatch.setenv("IR_CACHE_DIR", str(tmp_path / "cache"))
+
+    src = ir.CorpusSource.from_mapping(
+        {"a": "alpha apple avocado", "b": "beta banana blueberry"},
+        name="lz",
+        strategy=ir.WholeText(),
+    )
+    ir.build(src, embedder="light")  # builds into the tmp data dir
+
+    # Count model resolutions from here on (build's own load is already done).
+    real = idx.make_embedder
+    calls = []
+
+    def counting(spec, **kw):
+        calls.append(spec)
+        return real(spec, **kw)
+
+    monkeypatch.setattr(idx, "make_embedder", counting)
+
+    corpus = ir.open_corpus("lz")
+    assert corpus.embedder_id  # known from config without resolving the model
+    assert len(corpus) == 2
+    assert calls == []  # opening + len(): no model load
+
+    corpus.search("alpha", mode="lexical")
+    assert calls == []  # lexical ranks on text alone: still no model load
+
+    corpus.search("alpha", mode="dense")
+    assert len(calls) == 1  # dense embeds the query: resolves exactly once
diff --git a/tests/test_policy.py b/tests/test_policy.py
new file mode 100644
index 0000000..c982e07
--- /dev/null
+++ b/tests/test_policy.py
@@ -0,0 +1,200 @@
+"""Per-corpus policy + registry v2 strategy persistence + idempotent maintain (#58)."""
+
+from datetime import datetime, timedelta
+
+import pytest
+
+import ir
+from ir import policy as P
+from ir.policy import MaintenancePolicy, in_downtime, is_reindex_due, resolve_policy
+from ir.strategy import Chunked, strategy_from_spec, strategy_to_spec
+
+# --------------------------------------------------------------------------- #
+# Strategy spec round-trip (the v1 gap: registry could not persist a strategy)
+# --------------------------------------------------------------------------- #
+
+
+def test_strategy_spec_roundtrip():
+    spec = strategy_to_spec(Chunked(chunk_size=800, overlap=100))
+    assert spec["name"] == "Chunked"
+    assert spec["params"]["chunk_size"] == 800
+    s2 = strategy_from_spec(spec)
+    assert isinstance(s2, Chunked) and s2.chunk_size == 800 and s2.overlap == 100
+
+
+def test_strategy_from_spec_none_is_none():
+    assert strategy_from_spec(None) is None
+
+
+def test_strategy_from_spec_unknown_raises():
+    with pytest.raises(ValueError, match="unknown strategy"):
+        strategy_from_spec({"name": "Nope", "params": {}})
+
+
+# --------------------------------------------------------------------------- #
+# Policy resolution + smart per-kind defaults
+# --------------------------------------------------------------------------- #
+
+
+def test_sessions_kind_default_is_interval_with_downtime():
+    pol = P.default_policy_for_kind("sessions")
+    assert pol.reindex.on == "interval" and pol.reindex.every_hours == 24
+    assert pol.synopsis.enabled is False
+    assert pol.synopsis.downtime_hours == (2, 6)
+
+
+def test_small_corpus_kinds_default_to_source_change():
+    for kind in ("skills", "packages", "reports", "files"):
+        assert P.default_policy_for_kind(kind).reindex.on == "source-change"
+
+
+def test_entry_maintenance_overrides_kind_default():
+    pol = resolve_policy(
+        {"kind": "sessions", "maintenance": {"synopsis": {"enabled": True}}}
+    )
+    assert pol.synopsis.enabled is True  # overridden
+    assert pol.reindex.on == "interval"  # kept from the kind default
+
+
+def test_v1_entry_resolves_to_kind_default():
+    assert resolve_policy({"kind": "skills"}).reindex.on == "source-change"
+
+
+# --------------------------------------------------------------------------- #
+# Timing predicates
+# --------------------------------------------------------------------------- #
+
+
+def test_source_change_is_always_due():
+    assert is_reindex_due(MaintenancePolicy(), None, datetime(2026, 6, 16, 12)) is True
+
+
+def test_interval_due_only_when_stale():
+    pol = resolve_policy({"kind": "sessions"})  # interval 24h
+    now = datetime(2026, 6, 16, 12)
+    assert is_reindex_due(pol, None, now) is True
+    assert is_reindex_due(pol, now - timedelta(hours=1), now) is False
+    assert is_reindex_due(pol, now - timedelta(hours=25), now) is True
+
+
+def test_manual_is_never_due():
+    pol = resolve_policy({"kind": "x", "maintenance": {"reindex": {"on": "manual"}}})
+    assert is_reindex_due(pol, None, datetime(2026, 6, 16, 12)) is False
+
+
+def test_downtime_window_and_midnight_wrap():
+    day = resolve_policy(
+        {"kind": "s", "maintenance": {"synopsis": {"downtime_hours": [2, 6]}}}
+    )
+    assert in_downtime(day, datetime(2026, 6, 16, 3)) is True
+    assert in_downtime(day, datetime(2026, 6, 16, 7)) is False
+    wrap = resolve_policy(
+        {"kind": "s", "maintenance": {"synopsis": {"downtime_hours": [22, 6]}}}
+    )
+    assert in_downtime(wrap, datetime(2026, 6, 16, 23)) is True
+    assert in_downtime(wrap, datetime(2026, 6, 16, 5)) is True
+    assert in_downtime(wrap, datetime(2026, 6, 16, 12)) is False
+
+
+def test_no_downtime_window_is_always():
+    assert in_downtime(MaintenancePolicy(), datetime(2026, 6, 16, 12)) is True
+
+
+# --------------------------------------------------------------------------- #
+# Registry v2 persistence (isolated config dir)
+# --------------------------------------------------------------------------- #
+
+
+def test_register_persists_and_reconstructs_strategy(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "cfg"))
+    ir.register(
+        "notes",
+        "files",
+        root=str(tmp_path),
+        strategy=Chunked(chunk_size=900),
+        maintenance={"reindex": {"on": "interval", "every_hours": 12}},
+    )
+    entry = ir.corpora()["notes"]
+    assert entry["strategy"]["params"]["chunk_size"] == 900
+    assert entry["maintenance"]["reindex"]["every_hours"] == 12
+
+    from ir.registry import source_from_entry
+
+    src = source_from_entry("notes", entry)
+    assert isinstance(src.indexing_strategy, Chunked)
+    assert src.indexing_strategy.chunk_size == 900
+
+
+def test_register_rejects_bad_reindex_trigger(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "cfg"))
+    with pytest.raises(ValueError, match="unknown reindex trigger"):
+        ir.register("x", "skills", maintenance={"reindex": {"on": "whenever"}})
+
+
+# --------------------------------------------------------------------------- #
+# maintain: builds when due, no-ops within the interval (idempotent)
+# --------------------------------------------------------------------------- #
+
+
+def test_maintain_builds_then_noops_within_interval(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "cfg"))
+    monkeypatch.setenv("IR_DATA_DIR", str(tmp_path / "data"))
+    monkeypatch.setenv("IR_CACHE_DIR", str(tmp_path / "cache"))
+    docs = tmp_path / "docs"
+    docs.mkdir()
+    (docs / "a.md").write_text("alpha apple avocado", encoding="utf-8")
+    ir.register(
+        "n",
+        "files",
+        root=str(docs),
+        embedder="light",
+        maintenance={"reindex": {"on": "interval", "every_hours": 24}},
+    )
+
+    now = datetime(2026, 6, 16, 12)
+    (r1,) = ir.maintain("n", now=now)
+    assert r1.ran and r1.reindex and r1.records >= 1
+
+    (r2,) = ir.maintain("n", now=now + timedelta(hours=1))  # within interval
+    assert not r2.ran and "not due" in r2.reason
+
+    (r3,) = ir.maintain("n", now=now + timedelta(hours=25))  # past interval
+    assert r3.ran
+
+
+def test_maintain_defers_synopsis_build_outside_downtime(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "cfg"))
+    monkeypatch.setenv("IR_DATA_DIR", str(tmp_path / "data"))
+    monkeypatch.setenv("IR_CACHE_DIR", str(tmp_path / "cache"))
+    docs = tmp_path / "docs"
+    docs.mkdir()
+    (docs / "a.md").write_text("alpha", encoding="utf-8")
+    ir.register(
+        "n",
+        "files",
+        root=str(docs),
+        embedder="light",
+        maintenance={
+            "reindex": {"on": "source-change"},
+            "synopsis": {"enabled": True, "downtime_hours": [2, 6]},
+        },
+    )
+    # Noon is outside [2, 6): the (LLM-touching) synopsis build is deferred, so
+    # nothing is built — and crucially no aix/LLM is reached.
+    (r,) = ir.maintain("n", now=datetime(2026, 6, 16, 12))
+    assert not r.ran and "downtime" in r.reason
+    assert len(ir.open_corpus("n")) == 0
+
+
+def test_maintain_dry_run_does_not_build(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "cfg"))
+    monkeypatch.setenv("IR_DATA_DIR", str(tmp_path / "data"))
+    monkeypatch.setenv("IR_CACHE_DIR", str(tmp_path / "cache"))
+    docs = tmp_path / "docs"
+    docs.mkdir()
+    (docs / "a.md").write_text("alpha", encoding="utf-8")
+    ir.register("n", "files", root=str(docs), embedder="light")
+    (r,) = ir.maintain("n", now=datetime(2026, 6, 16, 12), dry_run=True)
+    assert not r.ran and "dry-run" in r.reason
+    # nothing built: corpus is still empty
+    assert len(ir.open_corpus("n")) == 0
diff --git a/tests/test_sessions_corpus.py b/tests/test_sessions_corpus.py
new file mode 100644
index 0000000..f15686c
--- /dev/null
+++ b/tests/test_sessions_corpus.py
@@ -0,0 +1,139 @@
+"""Claude Code sessions corpus — ClaudeTurn strategy + from_claude_sessions (#57).
+
+The transcript parsing lives in ``priv``; here we inject a ``fetcher`` of
+turn-pair records so the ir-side corpus (surfaces, filter fields, presets) is
+tested without ``priv`` or real transcripts.
+"""
+
+import ir
+from ir.store import CorpusStore
+from ir.strategy import ClaudeTurn, strategy_from_spec, strategy_to_spec
+
+FAKE = [
+    {
+        "id": "s1:u1",
+        "user_prompt": "how do I deploy the app to the server",
+        "assistant_summary": "I deployed it via the deploy script and verified it live",
+        "assistant_full": "Let me check the config... I deployed it via the deploy "
+        "script and verified it live",
+        "session_id": "s1",
+        "cwd": "/x/myproj",
+        "project": "myproj",
+        "git_branch": "main",
+        "timestamp": "2026-06-10T10:00:00Z",
+        "model": "claude-opus-4-8",
+        "has_tool_use": True,
+    },
+    {
+        "id": "s1:u2",
+        "user_prompt": "fix the failing numpy import error",
+        "assistant_summary": "The numpy 2.x ABI mismatch caused it; pinned numpy "
+        "below 2 and the tests pass now",
+        "assistant_full": "Investigating the traceback... The numpy 2.x ABI mismatch "
+        "caused it; pinned numpy below 2 and the tests pass now",
+        "session_id": "s1",
+        "cwd": "/x/myproj",
+        "project": "myproj",
+        "git_branch": "main",
+        "timestamp": "2026-06-11T09:00:00Z",
+        "model": "claude-opus-4-8",
+        "has_tool_use": False,
+    },
+    {
+        "id": "s2:u1",
+        "user_prompt": "add a dark mode toggle",
+        "assistant_summary": "Added a dark mode toggle wired to a theme context",
+        "assistant_full": "Added a dark mode toggle wired to a theme context",
+        "session_id": "s2",
+        "cwd": "/x/other",
+        "project": "other",
+        "git_branch": "dev",
+        "timestamp": "2026-06-12T08:00:00Z",
+        "model": "claude-sonnet-4-6",
+        "has_tool_use": True,
+    },
+]
+
+
+def _corpus(**kw):
+    src = ir.CorpusSource.from_claude_sessions(fetcher=lambda: FAKE, **kw)
+    return ir.build(src, store=CorpusStore.memory(), embedder="light")
+
+
+def test_claudeturn_default_surfaces_and_filter_fields():
+    plan = ClaudeTurn().decompose("s1:u1", FAKE[0])
+    assert {s.kind for s in plan.surfaces} == {"user_prompt", "assistant_summary"}
+    assert plan.filter_fields["project"] == "myproj"
+    assert plan.filter_fields["has_tool_use"] is True
+    assert plan.filter_fields["model"] == "claude-opus-4-8"
+
+
+def test_claudeturn_include_full_adds_surface():
+    plan = ClaudeTurn(include_full=True).decompose("s1:u1", FAKE[0])
+    assert "assistant_full" in {s.kind for s in plan.surfaces}
+
+
+def test_claudeturn_session_title_record_indexes_the_title():
+    rec = {
+        "id": "s9:__title__",
+        "record_type": "session_title",
+        "session_title": "Fixing the numpy ABI crash",
+        "session_id": "s9",
+        "project": "ir",
+        "user_prompt": "",
+        "assistant_summary": "",
+    }
+    plan = ClaudeTurn().decompose("s9:__title__", rec)
+    assert {s.kind for s in plan.surfaces} == {"session_title"}
+    assert plan.surfaces[0].text == "Fixing the numpy ABI crash"
+    assert plan.filter_fields["session_title"] == "Fixing the numpy ABI crash"
+
+
+def test_claudeturn_turn_record_carries_session_title_as_metadata():
+    rec = dict(FAKE[0], session_title="the deploy session")
+    plan = ClaudeTurn().decompose(rec["id"], rec)
+    # a turn record still indexes user/assistant — not a title surface
+    assert {s.kind for s in plan.surfaces} == {"user_prompt", "assistant_summary"}
+    assert plan.filter_fields["session_title"] == "the deploy session"
+
+
+def test_build_indexes_two_surfaces_per_turn():
+    corpus = _corpus()
+    assert len(corpus) == 3 * 2  # 3 turns × {user_prompt, assistant_summary}
+
+
+def test_search_targets_assistant_side():
+    corpus = _corpus()
+    hits = corpus.search(
+        "numpy abi mismatch", surfaces={"assistant_summary"}, mode="lexical", k=3
+    )
+    assert hits and hits[0].artifact_id == "s1:u2"
+
+
+def test_search_targets_user_side():
+    corpus = _corpus()
+    hits = corpus.search(
+        "deploy the app", surfaces={"user_prompt"}, mode="lexical", k=3
+    )
+    assert hits and hits[0].artifact_id == "s1:u1"
+
+
+def test_filter_by_project_discriminates():
+    corpus = _corpus()
+    hits = corpus.search("toggle", filter={"project": "other"}, mode="lexical", k=5)
+    assert hits and all(h.metadata.get("project") == "other" for h in hits)
+
+
+def test_claudeturn_spec_roundtrips_for_registry_persistence():
+    spec = strategy_to_spec(ClaudeTurn(include_full=True))
+    assert spec["name"] == "ClaudeTurn" and spec["params"]["include_full"] is True
+    s2 = strategy_from_spec(spec)
+    assert isinstance(s2, ClaudeTurn) and s2.include_full is True
+
+
+def test_sessions_preset_resolves_and_has_interval_policy(tmp_path, monkeypatch):
+    monkeypatch.setenv("IR_CONFIG_DIR", str(tmp_path / "cfg"))
+    # The sessions kind's smart default is interval reindex with a downtime window.
+    pol = ir.default_policy_for_kind("sessions")
+    assert pol.reindex.on == "interval"
+    assert pol.synopsis.downtime_hours == (2, 6)
diff --git a/tests/test_store_packed.py b/tests/test_store_packed.py
new file mode 100644
index 0000000..b255cbb
--- /dev/null
+++ b/tests/test_store_packed.py
@@ -0,0 +1,100 @@
+"""Packed-matrix disk cache + vector-free ``metas()`` — the #56 perf paths.
+
+These exercise the file-backed (``packed_dir``) behavior that the in-memory
+store in ``test_store.py`` cannot: persisting one normalized matrix so a fresh
+process reopens it with a few reads instead of a per-record vector-file storm,
+and serving ``metas()`` without touching vectors (for lexical-only ranking).
+"""
+
+import numpy as np
+
+from ir.base import Record
+from ir.store import CorpusStore, _json_store, _ndarray_store
+
+
+def _rec(rid, *, vec=(1.0, 0.0, 0.0), text="t"):
+    return Record(
+        id=rid,
+        artifact_id=f"art_{rid}",
+        surface_kind="document",
+        surface_index=0,
+        text=text,
+        vector=np.asarray(vec, dtype=np.float32),
+        metadata={"owner": "ours"},
+    )
+
+
+def _file_store(tmp_path):
+    """A file-backed store with the on-disk packed cache enabled."""
+    root = tmp_path / "corpus"
+    store = CorpusStore(
+        meta=_json_store(root / "meta"),
+        vectors=_ndarray_store(root / "vectors"),
+        ledger=_json_store(root / "ledger"),
+        config=_json_store(root / "config"),
+        calibration=_json_store(root / "calibration"),
+        links=_json_store(root / "links"),
+        packed_dir=root / "matrix",
+    )
+    return store, root
+
+
+def test_packed_cache_written_then_reloaded_by_fresh_store(tmp_path):
+    store, root = _file_store(tmp_path)
+    store.put_record(_rec("r1", vec=(3.0, 0.0, 0.0)))
+    store.put_record(_rec("r2", vec=(0.0, 4.0, 0.0)))
+    ids, mat, metas = store.matrix()  # builds from records + persists packed
+    assert (root / "matrix" / "sig.json").exists()
+
+    # A brand-new store over the same dir must reload from the packed cache and
+    # return identical ids / normalized matrix / metas.
+    store2, _ = _file_store(tmp_path)
+    ids2, mat2, metas2 = store2.matrix()
+    assert ids2 == ids
+    np.testing.assert_allclose(np.asarray(mat2), np.asarray(mat), atol=1e-6)
+    assert metas2 == metas
+
+
+def test_packed_cache_cleared_on_write_then_rebuilt(tmp_path):
+    store, root = _file_store(tmp_path)
+    store.put_record(_rec("r1"))
+    store.matrix()
+    sig = root / "matrix" / "sig.json"
+    assert sig.exists()
+
+    store.put_record(_rec("r2"))  # any write invalidates the packed cache
+    assert not sig.exists()
+
+    ids, mat, _ = store.matrix()  # rebuilds with both rows + re-persists
+    assert mat.shape[0] == 2
+    assert sig.exists()
+
+
+def test_metas_matches_matrix_metas(tmp_path):
+    store, _ = _file_store(tmp_path)
+    store.put_record(_rec("r1", text="alpha"))
+    store.put_record(_rec("r2", text="beta"))
+    ids_m, metas_m = store.metas()  # vector-free path
+    ids, _mat, metas = store.matrix()
+    assert ids_m == ids
+    assert metas_m == metas
+
+
+def test_corrupt_packed_cache_falls_back_to_rebuild(tmp_path):
+    store, root = _file_store(tmp_path)
+    store.put_record(_rec("r1"))
+    store.matrix()
+    # A torn / corrupt sig must read as invalid → rebuild, never raise.
+    (root / "matrix" / "sig.json").write_text("not json", encoding="utf-8")
+    store2, _ = _file_store(tmp_path)
+    ids, mat, metas = store2.matrix()
+    assert mat.shape[0] == 1
+
+
+def test_memory_store_keeps_purely_in_process(tmp_path):
+    store = CorpusStore.memory()
+    assert store._packed_dir is None  # no disk cache for the in-memory store
+    store.put_record(_rec("r1"))
+    assert store.matrix()[1].shape == (1, 3)
+    ids, metas = store.metas()
+    assert ids == ["r1"] and len(metas) == 1
diff --git a/tests/test_synopsis.py b/tests/test_synopsis.py
index 192ad5f..11542df 100644
--- a/tests/test_synopsis.py
+++ b/tests/test_synopsis.py
@@ -10,7 +10,7 @@
 - incremental rebuild re-synthesizes only changed artifacts, and a
   synthesizer-identity (or inner-strategy) change re-synthesizes — staleness via
   the ledger ``strategy_id`` and the recursive ``_strategy_id``.
-- offline import preserved: ``oa`` is lazy, an injected synthesizer never needs it.
+- offline import preserved: ``aix`` is lazy, an injected synthesizer never needs it.
 
 Hermetic: light embedder + memory store + injected synthesizers.
 """
@@ -187,7 +187,7 @@ def test_strategy_id_recurses_into_inner_strategy_and_synthesizer():
 
 
 # --------------------------------------------------------------------------- #
-# Offline: oa is lazy; an injected synthesizer never needs it
+# Offline: aix is lazy; an injected synthesizer never needs it
 # --------------------------------------------------------------------------- #
 
 
@@ -209,15 +209,15 @@ def boom(text):
 
 
 def test_default_synthesizer_constructs_offline_and_stamps_identity():
-    # No synthesize injected -> the lazy-oa default; constructing must not need oa,
-    # and an empty-text artifact short-circuits before the oa path is reached.
+    # No synthesize injected -> the lazy-aix default; constructing must not need
+    # aix, and an empty-text artifact short-circuits before the aix path is reached.
     strat = with_synopsis(ir.Chunked())
-    assert strat.synthesizer_id.startswith("oa:")
+    assert strat.synthesizer_id.startswith("aix:")
     assert strat.synthesize(Artifact("x", "")) == ""
     # a prompt change shifts the default identity (re-synthesis trigger)
     a = make_llm_synthesizer()
     b = make_llm_synthesizer(prompt="a very different prompt {text}")
-    assert a.synthesizer_id.startswith("oa:") and a.synthesizer_id != b.synthesizer_id
+    assert a.synthesizer_id.startswith("aix:") and a.synthesizer_id != b.synthesizer_id
 
 
 # --------------------------------------------------------------------------- #
@@ -313,25 +313,25 @@ def test_with_synopsis_package_prepends_synopsis_before_description_and_routes()
 
 
 def test_default_synthesizer_construction_is_lazy_and_offline(monkeypatch):
-    # Mutation-resistant offline guarantee: poison oa so any use raises, then
+    # Mutation-resistant offline guarantee: poison aix so any use raises, then
     # confirm constructing the default synthesizer (and the wrapper) and the
     # injected path never reach it. An eager-import regression would fail here.
     import sys
     import types
 
-    poison = types.ModuleType("oa")
+    poison = types.ModuleType("aix")
 
     def _boom(*a, **k):
-        raise AssertionError("oa.prompt_function reached on an offline path")
+        raise AssertionError("aix.prompt_func reached on an offline path")
 
-    poison.prompt_function = _boom
-    monkeypatch.setitem(sys.modules, "oa", poison)
+    poison.prompt_func = _boom
+    monkeypatch.setitem(sys.modules, "aix", poison)
 
     synth = make_llm_synthesizer()  # lazy: builds nothing yet
-    strat = with_synopsis(ir.Chunked())  # default synth: still no oa
-    assert synth(Artifact("x", "")) == ""  # empty text short-circuits the oa path
+    strat = with_synopsis(ir.Chunked())  # default synth: still no aix
+    assert synth(Artifact("x", "")) == ""  # empty text short-circuits the aix path
     assert strat.synthesize(Artifact("y", "")) == ""
-    # an injected summarizer is wholly oa-free even on non-empty text
+    # an injected summarizer is wholly aix-free even on non-empty text
     assert make_llm_synthesizer(summarize=lambda t: "S")(Artifact("z", "body")) == "S"