i2mint · thorwhalen · Jun 16, 2026 · Jun 16, 2026
diff --git a/README.md b/README.md
@@ -13,7 +13,8 @@ is the central selection risk — fewer, better candidates beat more), and
 import ir
 
 # Define a corpus, build the index (incremental), then discover:
-source = ir.CorpusSource.from_skills()       # or from_packages(), from_md_reports(), from_files(...)
+source = ir.CorpusSource.from_skills()       # or from_packages(), from_md_reports(),
+                                              # from_claude_sessions(), from_files(...)
 corpus = ir.build(source)                     # embed + persist under XDG dirs
 result = ir.discover(corpus, "how do I deploy the app to the server")
 
@@ -242,8 +243,11 @@ ir search skills "deploy the app"        # rank candidates (retrieval only)
 ir discover skills "deploy the app"      # retrieve -> select
 ir discover skills "deploy the app" --disclose       # + load bodies
 ir discover skills "deploy the app" --min-score auto # + calibrated abstention
+ir build sessions                        # index recent Claude Code sessions (turn pairs)
+ir search sessions "numpy abi error" --mode lexical   # find past sessions
 ir ls                                    # list corpora + record counts
-ir info skills                           # config, stats, calibrated floors
+ir info skills                           # config, stats, policy, calibrated floors
+ir maintain --all                        # run due background work (idempotent; cron-friendly)
 ir register notes files --root ~/notes --pattern '.*\.md$'  # register a custom corpus
 ir rm notes                              # unregister (keeps built data)
 ir eval-gen skills skills_eval.jsonl     # generate eval cases (needs oa/LLM)

diff --git a/ir/__init__.py b/ir/__init__.py
@@ -45,7 +45,15 @@
     default_edge_extractor,
 )
 from .index import Corpus, build, open_corpus
-from .registry import retriever_for, retrievers
+from .maintenance import MaintenanceResult, maintain, maintain_corpus
+from .policy import (
+    MaintenancePolicy,
+    ReindexPolicy,
+    SynopsisPolicy,
+    default_policy_for_kind,
+    resolve_policy,
+)
+from .registry import policy_for, retriever_for, retrievers
 from .retrieve import Retriever, as_retriever, fuse_hits, records_for_artifact
 from .retrieve import search as _search
 from .select import (
@@ -58,7 +66,14 @@
 )
 from .sources import CorpusSource
 from .store import CorpusStore
-from .strategy import Chunked, IndexingStrategy, Package, Skill, WholeText
+from .strategy import (
+    Chunked,
+    ClaudeTurn,
+    IndexingStrategy,
+    Package,
+    Skill,
+    WholeText,
+)
 from .synopsis import Synthesizer, make_llm_synthesizer, with_synopsis
 from .traverse import WalkPolicy, WalkState, collapsed_tree_policy, traverse
 
@@ -73,6 +88,7 @@
     "Chunked",
     "Skill",
     "Package",
+    "ClaudeTurn",
     "with_synopsis",
     "make_llm_synthesizer",
     "Synthesizer",
@@ -114,6 +130,15 @@
     "register",
     "corpora",
     "build_corpus",
+    "maintain",
+    "maintain_corpus",
+    "MaintenanceResult",
+    "MaintenancePolicy",
+    "ReindexPolicy",
+    "SynopsisPolicy",
+    "resolve_policy",
+    "default_policy_for_kind",
+    "policy_for",
 ]
 
 register = registry.register
@@ -148,6 +173,6 @@ def search(corpus, query, **kwargs):
 # The evaluation harness is reachable as ``ir.eval`` (its ``ef`` imports are
 # lazy, so this does not weigh down ``import ir``). Kept out of ``__all__`` so a
 # star-import does not shadow the ``eval`` builtin. ``ir.eval_gen`` is the
-# build-time case generator (its ``oa`` import is lazy too).
+# build-time case generator (its ``aix`` import is lazy too).
 from . import eval  # noqa: E402,F401  (submodule attribute: ir.eval)
 from . import eval_gen  # noqa: E402,F401  (submodule attribute: ir.eval_gen)
diff --git a/ir/__main__.py b/ir/__main__.py
@@ -10,7 +10,28 @@ def main():
     import argh
 
     parser = argh.ArghParser()
-    argh.add_commands(parser, COMMANDS)
+    # argh >= 0.30 requires an explicit name-mapping policy as soon as a command
+    # has an *optional positional* (e.g. ``maintain(name=None, ...)``).
+    # ``BY_NAME_IF_KWONLY`` keeps positional params positional and maps
+    # keyword-only params to options — exactly ir's existing command convention,
+    # so it changes nothing for the other commands. Fall back gracefully on
+    # older argh that lacks the policy.
+    try:
+        policy = argh.NameMappingPolicy.BY_NAME_IF_KWONLY
+    except AttributeError:
+        try:
+            from argh.assembling import NameMappingPolicy
+
+            policy = NameMappingPolicy.BY_NAME_IF_KWONLY
+        except ImportError:
+            policy = None
+    try:
+        if policy is not None:
+            argh.add_commands(parser, COMMANDS, name_mapping_policy=policy)
+        else:
+            argh.add_commands(parser, COMMANDS)
+    except TypeError:  # very old argh without the kwarg
+        argh.add_commands(parser, COMMANDS)
     parser.dispatch()
 
 

diff --git a/ir/cli.py b/ir/cli.py
@@ -11,7 +11,7 @@
     ir info packages                # config + stats for a corpus
     ir register notes files --root ~/notes --pattern '.*\\.md$'
     ir rm notes                     # unregister (keeps built data)
-    ir eval-gen skills skills_eval.jsonl --k 5        # generate cases (needs oa/LLM)
+    ir eval-gen skills skills_eval.jsonl --k 5        # generate cases (needs aix/LLM)
     ir eval skills skills_eval.jsonl --mode hybrid    # score retrieval on a case file
     ir eval-select skills skills_eval.jsonl           # score the selection stage
     ir sweep-select skills skills_eval.jsonl          # tune max_k/rel for the selector
@@ -63,14 +63,38 @@ def ls():
     return "\n".join(lines)
 
 
-def register(name, kind, *, root=None, pattern=None, embedder="default"):
-    """Register a named corpus. kind: skills | packages | reports | files."""
+def register(
+    name,
+    kind,
+    *,
+    root=None,
+    pattern=None,
+    embedder="default",
+    reindex_on=None,
+    every_hours=None,
+    synopsis=False,
+):
+    """Register a named corpus. kind: skills | packages | reports | files.
+
+    Background-work policy (optional; smart per-kind defaults otherwise — see
+    `ir.policy`): reindex_on (source-change | interval | manual), every_hours (for
+    interval), synopsis (enable LLM synopses, run only in the policy's downtime
+    window by `ir maintain`).
+    """
     params = {}
     if root:
         params["root"] = root
     if pattern:
         params["pattern"] = pattern
-    registry.register(name, kind, embedder=embedder, **params)
+    maintenance = None
+    if reindex_on or every_hours or synopsis:
+        reindex = {}
+        if every_hours:
+            reindex = {"on": "interval", "every_hours": float(every_hours)}
+        elif reindex_on:
+            reindex = {"on": reindex_on}
+        maintenance = {"reindex": reindex, "synopsis": {"enabled": bool(synopsis)}}
+    registry.register(name, kind, embedder=embedder, maintenance=maintenance, **params)
     return f"registered {name!r} (kind={kind}, embedder={embedder})"
 
 
@@ -158,18 +182,45 @@ def discover(
 
 
 def info(name):
-    """Show a corpus's stored config, stats, and any calibrated abstention floors."""
+    """Show a corpus's stored config, stats, policy, and any abstention floors."""
     corpus = open_corpus(name)
     cfg = corpus.store.get_config()
     reg = registry.get(name)
     calibrated = corpus.store.calibration_modes()
     floors = {m: corpus.store.get_calibration(m).get("min_score") for m in calibrated}
     cal = f"\nmin_score floors: {floors}" if floors else ""
+    pol = registry.policy_for(name)
+    state = corpus.store.get_maintenance_state()
+    last = state.get("last_maintained", "never")
+    syn = pol.synopsis
+    syn_str = f"enabled, scope={syn.scope}/{syn.window_days}d" if syn.enabled else "off"
+    window = f", downtime={syn.downtime_hours}" if syn.downtime_hours else ""
+    policy_str = (
+        f"\npolicy: reindex={pol.reindex.on}"
+        + (f"/{pol.reindex.every_hours}h" if pol.reindex.every_hours else "")
+        + f", synopsis={syn_str}{window}\nlast maintained: {last}"
+    )
     return (
-        f"name: {name}\nregistered: {reg}\nrecords: {len(corpus)}\nconfig: {cfg}{cal}"
+        f"name: {name}\nregistered: {reg}\nrecords: {len(corpus)}\n"
+        f"config: {cfg}{policy_str}{cal}"
     )
 
 
+def maintain(name=None, *, all=False, dry_run=False):
+    """Run due background work: incremental reindex, synopsis in its downtime window.
+
+    With a name, maintains that corpus; with --all (or no name), every registered
+    corpus. Idempotent and safe to schedule (cron/launchd): it no-ops what is not
+    due. --dry-run reports what would run without doing it.
+    """
+    from .maintenance import maintain as _maintain
+
+    results = _maintain(name=name, all=all, dry_run=dry_run)
+    if not results:
+        return "no corpora registered"
+    return "\n".join(str(r) for r in results)
+
+
 def rm(name):
     """Unregister a corpus (does not delete its built data)."""
     registry.unregister(name)
@@ -202,12 +253,12 @@ def eval(name, cases, *, mode="hybrid", k=10):
 
 
 def eval_gen(name, out, *, k=5, abstention_frac=0.15, max_artifacts=None):
-    """Generate an eval-case file for a corpus by back-translation (needs oa/LLM).
+    """Generate an eval-case file for a corpus by back-translation (needs aix/LLM).
 
     Writes a DiscoveryCase JSONL set (gold cases + an abstention slice) for the
     registered corpus *name* to *out*, stamping a corpus-signature into the
     header so the frozen file can be checked against the live corpus later. This
-    command calls an LLM via oa; scoring it afterwards (`ir eval`) is offline.
+    command calls an LLM via aix; scoring it afterwards (`ir eval`) is offline.
     """
     from .eval import save_cases
     from .eval_gen import build_eval_set, corpus_signature
@@ -375,6 +426,7 @@ def calibrate_min_score(
     search,
     discover,
     info,
+    maintain,
     rm,
     eval,
     eval_gen,

diff --git a/ir/eval_gen.py b/ir/eval_gen.py
@@ -19,9 +19,9 @@
 The LLM is **injected** (`query_generator` / `abstention_generator` callables),
 so the generation *logic* — masking, gold assignment, the leakage guard, the
 abstention fraction — is fully testable with a deterministic stub and no network.
-The default generators are built lazily on :mod:`oa` (`oa.prompt_function`), so
-``import ir.eval_gen`` stays cheap and offline; ``oa`` is only imported when you
-actually generate with the real LLM.
+The default generators are built lazily on :mod:`aix` (`aix.prompt_func`, the
+multi-provider LLM facade), so ``import ir.eval_gen`` stays cheap and offline;
+``aix`` is only imported when you actually generate with the real LLM.
 
 The output is plain :class:`~ir.eval.DiscoveryCase` data — freeze it with
 :func:`ir.eval.save_cases` (stamping :func:`corpus_signature` into the
@@ -34,7 +34,7 @@
     from ir import eval_gen as eg
 
     source = ir.CorpusSource.from_skills()
-    cases = eg.build_eval_set(source, k=5, corpus_name="skills")   # uses oa
+    cases = eg.build_eval_set(source, k=5, corpus_name="skills")   # uses aix
     from ir.eval import save_cases
     save_cases(cases, "skills_eval.jsonl",
                meta={"corpus": "skills", "corpus_signature": eg.corpus_signature(source)})
@@ -173,7 +173,7 @@ def _leaks_name(text: str, name: str) -> bool:
 
 
 # =========================================================================== #
-# Default (oa-backed) generators — lazily built, only when actually used
+# Default (aix-backed) generators — lazily built, only when actually used
 # =========================================================================== #
 
 
@@ -199,13 +199,13 @@ def _parse_lines(text: Any) -> list[str]:
     return lines
 
 
-def make_oa_query_generator(
+def make_default_query_generator(
     *, prompt: str = BACKTRANSLATION_PROMPT, **prompt_function_kwargs: Any
 ) -> QueryGenerator:
-    """Build the default back-translation generator on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default back-translation generator on :mod:`aix` (lazy import)."""
+    import aix
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_lines, name="backtranslate", **prompt_function_kwargs
     )
 
@@ -215,13 +215,13 @@ def generate(description: str, *, n: int) -> list[str]:
     return generate
 
 
-def make_oa_abstention_generator(
+def make_default_abstention_generator(
     *, prompt: str = ABSTENTION_PROMPT, **prompt_function_kwargs: Any
 ) -> AbstentionGenerator:
-    """Build the default abstention generator on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default abstention generator on :mod:`aix` (lazy import)."""
+    import aix
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_lines, name="abstention", **prompt_function_kwargs
     )
 
@@ -287,7 +287,7 @@ def generate_cases(
         mask_names: scrub the artifact name from the description before
             generating, and drop any generated intent that still contains it.
         query_generator: ``(description, *, n) -> [intent, …]``. Defaults to the
-            :mod:`oa`-backed back-translator (built lazily; needs a model).
+            :mod:`aix`-backed back-translator (built lazily; needs a model).
         describe: ``raw -> description`` (default: the ``description`` / ``text``
             field, else the joined string fields).
         min_chars: skip artifacts whose description is shorter than this.
@@ -304,7 +304,7 @@ def generate_cases(
     """
     if k < 1:
         raise ValueError(f"k must be >= 1, got {k!r}.")
-    gen = query_generator or make_oa_query_generator()
+    gen = query_generator or make_default_query_generator()
     describe = describe or _default_describe
     cases: list[DiscoveryCase] = []
     skipped = 0
@@ -368,7 +368,7 @@ def generate_abstention_cases(
     """Generate ``n`` abstention cases — out-of-scope intents (empty ``gold``)."""
     if n <= 0:
         return []
-    gen = generator or make_oa_abstention_generator()
+    gen = generator or make_default_abstention_generator()
     intents = gen(n=n, theme=theme)
     cases = [
         DiscoveryCase(

diff --git a/ir/formulate.py b/ir/formulate.py
@@ -17,7 +17,7 @@
 lives in the agent layer (``raglab``), not here.
 
 :func:`make_llm_formulator` mirrors :func:`ir.select.make_llm_selector`: an
-injectable ``rewriter`` callable, built lazily on :mod:`oa` when omitted (so
+injectable ``rewriter`` callable, built lazily on :mod:`aix` when omitted (so
 importing ir stays offline), falling back to identity on any failure — a
 formulator must never make retrieval *worse* than the raw query.
 """
@@ -47,13 +47,13 @@ def identity_formulator(query: str) -> str:
 
 
 def _default_llm_rewriter(prompt: str, n: int, **prompt_function_kwargs: Any):
-    """Build the default LLM rewriter on :mod:`oa` (lazy import)."""
-    import oa
+    """Build the default LLM rewriter on :mod:`aix` (lazy import)."""
+    import aix
 
     def _parse_lines(text: str) -> list[str]:
         return [line.strip(" -\t") for line in str(text).splitlines() if line.strip()]
 
-    fn = oa.prompt_function(
+    fn = aix.prompt_func(
         prompt, egress=_parse_lines, name="formulate_queries", **prompt_function_kwargs
     )
 
@@ -74,8 +74,8 @@ def make_llm_formulator(
     """An LLM-backed :data:`Formulator` (rewrite / expand / multi-query).
 
     ``rewriter`` is an injectable ``query -> str | [str, ...]`` callable (a test
-    double, or your own router); when omitted it is built lazily on :mod:`oa`
-    (``oa.prompt_function``), so importing this module stays offline. ``n`` is the
+    double, or your own router); when omitted it is built lazily on :mod:`aix`
+    (``aix.prompt_func``), so importing this module stays offline. ``n`` is the
     multi-query fan-out width. Any error or empty reply falls back to ``fallback``
     (default: :func:`identity_formulator`).
     """