Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ is the central selection risk — fewer, better candidates beat more), and
import ir

# Define a corpus, build the index (incremental), then discover:
source = ir.CorpusSource.from_skills() # or from_packages(), from_md_reports(), from_files(...)
source = ir.CorpusSource.from_skills() # or from_packages(), from_md_reports(),
# from_claude_sessions(), from_files(...)
corpus = ir.build(source) # embed + persist under XDG dirs
result = ir.discover(corpus, "how do I deploy the app to the server")

Expand Down Expand Up @@ -242,8 +243,11 @@ ir search skills "deploy the app" # rank candidates (retrieval only)
ir discover skills "deploy the app" # retrieve -> select
ir discover skills "deploy the app" --disclose # + load bodies
ir discover skills "deploy the app" --min-score auto # + calibrated abstention
ir build sessions # index recent Claude Code sessions (turn pairs)
ir search sessions "numpy abi error" --mode lexical # find past sessions
ir ls # list corpora + record counts
ir info skills # config, stats, calibrated floors
ir info skills # config, stats, policy, calibrated floors
ir maintain --all # run due background work (idempotent; cron-friendly)
ir register notes files --root ~/notes --pattern '.*\.md$' # register a custom corpus
ir rm notes # unregister (keeps built data)
ir eval-gen skills skills_eval.jsonl # generate eval cases (needs oa/LLM)
Expand Down
31 changes: 28 additions & 3 deletions ir/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,15 @@
default_edge_extractor,
)
from .index import Corpus, build, open_corpus
from .registry import retriever_for, retrievers
from .maintenance import MaintenanceResult, maintain, maintain_corpus
from .policy import (
MaintenancePolicy,
ReindexPolicy,
SynopsisPolicy,
default_policy_for_kind,
resolve_policy,
)
from .registry import policy_for, retriever_for, retrievers
from .retrieve import Retriever, as_retriever, fuse_hits, records_for_artifact
from .retrieve import search as _search
from .select import (
Expand All @@ -58,7 +66,14 @@
)
from .sources import CorpusSource
from .store import CorpusStore
from .strategy import Chunked, IndexingStrategy, Package, Skill, WholeText
from .strategy import (
Chunked,
ClaudeTurn,
IndexingStrategy,
Package,
Skill,
WholeText,
)
from .synopsis import Synthesizer, make_llm_synthesizer, with_synopsis
from .traverse import WalkPolicy, WalkState, collapsed_tree_policy, traverse

Expand All @@ -73,6 +88,7 @@
"Chunked",
"Skill",
"Package",
"ClaudeTurn",
"with_synopsis",
"make_llm_synthesizer",
"Synthesizer",
Expand Down Expand Up @@ -114,6 +130,15 @@
"register",
"corpora",
"build_corpus",
"maintain",
"maintain_corpus",
"MaintenanceResult",
"MaintenancePolicy",
"ReindexPolicy",
"SynopsisPolicy",
"resolve_policy",
"default_policy_for_kind",
"policy_for",
]

register = registry.register
Expand Down Expand Up @@ -148,6 +173,6 @@ def search(corpus, query, **kwargs):
# The evaluation harness is reachable as ``ir.eval`` (its ``ef`` imports are
# lazy, so this does not weigh down ``import ir``). Kept out of ``__all__`` so a
# star-import does not shadow the ``eval`` builtin. ``ir.eval_gen`` is the
# build-time case generator (its ``oa`` import is lazy too).
# build-time case generator (its ``aix`` import is lazy too).
from . import eval # noqa: E402,F401 (submodule attribute: ir.eval)
from . import eval_gen # noqa: E402,F401 (submodule attribute: ir.eval_gen)
23 changes: 22 additions & 1 deletion ir/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,28 @@ def main():
import argh

parser = argh.ArghParser()
argh.add_commands(parser, COMMANDS)
# argh >= 0.30 requires an explicit name-mapping policy as soon as a command
# has an *optional positional* (e.g. ``maintain(name=None, ...)``).
# ``BY_NAME_IF_KWONLY`` keeps positional params positional and maps
# keyword-only params to options — exactly ir's existing command convention,
# so it changes nothing for the other commands. Fall back gracefully on
# older argh that lacks the policy.
try:
policy = argh.NameMappingPolicy.BY_NAME_IF_KWONLY
except AttributeError:
try:
from argh.assembling import NameMappingPolicy

policy = NameMappingPolicy.BY_NAME_IF_KWONLY
except ImportError:
policy = None
try:
if policy is not None:
argh.add_commands(parser, COMMANDS, name_mapping_policy=policy)
else:
argh.add_commands(parser, COMMANDS)
except TypeError: # very old argh without the kwarg
argh.add_commands(parser, COMMANDS)
parser.dispatch()


Expand Down
68 changes: 60 additions & 8 deletions ir/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ir info packages # config + stats for a corpus
ir register notes files --root ~/notes --pattern '.*\\.md$'
ir rm notes # unregister (keeps built data)
ir eval-gen skills skills_eval.jsonl --k 5 # generate cases (needs oa/LLM)
ir eval-gen skills skills_eval.jsonl --k 5 # generate cases (needs aix/LLM)
ir eval skills skills_eval.jsonl --mode hybrid # score retrieval on a case file
ir eval-select skills skills_eval.jsonl # score the selection stage
ir sweep-select skills skills_eval.jsonl # tune max_k/rel for the selector
Expand Down Expand Up @@ -63,14 +63,38 @@ def ls():
return "\n".join(lines)


def register(name, kind, *, root=None, pattern=None, embedder="default"):
"""Register a named corpus. kind: skills | packages | reports | files."""
def register(
name,
kind,
*,
root=None,
pattern=None,
embedder="default",
reindex_on=None,
every_hours=None,
synopsis=False,
):
"""Register a named corpus. kind: skills | packages | reports | files.

Background-work policy (optional; smart per-kind defaults otherwise — see
`ir.policy`): reindex_on (source-change | interval | manual), every_hours (for
interval), synopsis (enable LLM synopses, run only in the policy's downtime
window by `ir maintain`).
"""
params = {}
if root:
params["root"] = root
if pattern:
params["pattern"] = pattern
registry.register(name, kind, embedder=embedder, **params)
maintenance = None
if reindex_on or every_hours or synopsis:
reindex = {}
if every_hours:
reindex = {"on": "interval", "every_hours": float(every_hours)}
elif reindex_on:
reindex = {"on": reindex_on}
maintenance = {"reindex": reindex, "synopsis": {"enabled": bool(synopsis)}}
registry.register(name, kind, embedder=embedder, maintenance=maintenance, **params)
return f"registered {name!r} (kind={kind}, embedder={embedder})"


Expand Down Expand Up @@ -158,18 +182,45 @@ def discover(


def info(name):
"""Show a corpus's stored config, stats, and any calibrated abstention floors."""
"""Show a corpus's stored config, stats, policy, and any abstention floors."""
corpus = open_corpus(name)
cfg = corpus.store.get_config()
reg = registry.get(name)
calibrated = corpus.store.calibration_modes()
floors = {m: corpus.store.get_calibration(m).get("min_score") for m in calibrated}
cal = f"\nmin_score floors: {floors}" if floors else ""
pol = registry.policy_for(name)
state = corpus.store.get_maintenance_state()
last = state.get("last_maintained", "never")
syn = pol.synopsis
syn_str = f"enabled, scope={syn.scope}/{syn.window_days}d" if syn.enabled else "off"
window = f", downtime={syn.downtime_hours}" if syn.downtime_hours else ""
policy_str = (
f"\npolicy: reindex={pol.reindex.on}"
+ (f"/{pol.reindex.every_hours}h" if pol.reindex.every_hours else "")
+ f", synopsis={syn_str}{window}\nlast maintained: {last}"
)
return (
f"name: {name}\nregistered: {reg}\nrecords: {len(corpus)}\nconfig: {cfg}{cal}"
f"name: {name}\nregistered: {reg}\nrecords: {len(corpus)}\n"
f"config: {cfg}{policy_str}{cal}"
)


def maintain(name=None, *, all=False, dry_run=False):
"""Run due background work: incremental reindex, synopsis in its downtime window.

With a name, maintains that corpus; with --all (or no name), every registered
corpus. Idempotent and safe to schedule (cron/launchd): it no-ops what is not
due. --dry-run reports what would run without doing it.
"""
from .maintenance import maintain as _maintain

results = _maintain(name=name, all=all, dry_run=dry_run)
if not results:
return "no corpora registered"
return "\n".join(str(r) for r in results)


def rm(name):
"""Unregister a corpus (does not delete its built data)."""
registry.unregister(name)
Expand Down Expand Up @@ -202,12 +253,12 @@ def eval(name, cases, *, mode="hybrid", k=10):


def eval_gen(name, out, *, k=5, abstention_frac=0.15, max_artifacts=None):
"""Generate an eval-case file for a corpus by back-translation (needs oa/LLM).
"""Generate an eval-case file for a corpus by back-translation (needs aix/LLM).

Writes a DiscoveryCase JSONL set (gold cases + an abstention slice) for the
registered corpus *name* to *out*, stamping a corpus-signature into the
header so the frozen file can be checked against the live corpus later. This
command calls an LLM via oa; scoring it afterwards (`ir eval`) is offline.
command calls an LLM via aix; scoring it afterwards (`ir eval`) is offline.
"""
from .eval import save_cases
from .eval_gen import build_eval_set, corpus_signature
Expand Down Expand Up @@ -375,6 +426,7 @@ def calibrate_min_score(
search,
discover,
info,
maintain,
rm,
eval,
eval_gen,
Expand Down
32 changes: 16 additions & 16 deletions ir/eval_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
The LLM is **injected** (`query_generator` / `abstention_generator` callables),
so the generation *logic* — masking, gold assignment, the leakage guard, the
abstention fraction — is fully testable with a deterministic stub and no network.
The default generators are built lazily on :mod:`oa` (`oa.prompt_function`), so
``import ir.eval_gen`` stays cheap and offline; ``oa`` is only imported when you
actually generate with the real LLM.
The default generators are built lazily on :mod:`aix` (`aix.prompt_func`, the
multi-provider LLM facade), so ``import ir.eval_gen`` stays cheap and offline;
``aix`` is only imported when you actually generate with the real LLM.

The output is plain :class:`~ir.eval.DiscoveryCase` data — freeze it with
:func:`ir.eval.save_cases` (stamping :func:`corpus_signature` into the
Expand All @@ -34,7 +34,7 @@
from ir import eval_gen as eg

source = ir.CorpusSource.from_skills()
cases = eg.build_eval_set(source, k=5, corpus_name="skills") # uses oa
cases = eg.build_eval_set(source, k=5, corpus_name="skills") # uses aix
from ir.eval import save_cases
save_cases(cases, "skills_eval.jsonl",
meta={"corpus": "skills", "corpus_signature": eg.corpus_signature(source)})
Expand Down Expand Up @@ -173,7 +173,7 @@ def _leaks_name(text: str, name: str) -> bool:


# =========================================================================== #
# Default (oa-backed) generators — lazily built, only when actually used
# Default (aix-backed) generators — lazily built, only when actually used
# =========================================================================== #


Expand All @@ -199,13 +199,13 @@ def _parse_lines(text: Any) -> list[str]:
return lines


def make_oa_query_generator(
def make_default_query_generator(
*, prompt: str = BACKTRANSLATION_PROMPT, **prompt_function_kwargs: Any
) -> QueryGenerator:
"""Build the default back-translation generator on :mod:`oa` (lazy import)."""
import oa
"""Build the default back-translation generator on :mod:`aix` (lazy import)."""
import aix

fn = oa.prompt_function(
fn = aix.prompt_func(
prompt, egress=_parse_lines, name="backtranslate", **prompt_function_kwargs
)

Expand All @@ -215,13 +215,13 @@ def generate(description: str, *, n: int) -> list[str]:
return generate


def make_oa_abstention_generator(
def make_default_abstention_generator(
*, prompt: str = ABSTENTION_PROMPT, **prompt_function_kwargs: Any
) -> AbstentionGenerator:
"""Build the default abstention generator on :mod:`oa` (lazy import)."""
import oa
"""Build the default abstention generator on :mod:`aix` (lazy import)."""
import aix

fn = oa.prompt_function(
fn = aix.prompt_func(
prompt, egress=_parse_lines, name="abstention", **prompt_function_kwargs
)

Expand Down Expand Up @@ -287,7 +287,7 @@ def generate_cases(
mask_names: scrub the artifact name from the description before
generating, and drop any generated intent that still contains it.
query_generator: ``(description, *, n) -> [intent, …]``. Defaults to the
:mod:`oa`-backed back-translator (built lazily; needs a model).
:mod:`aix`-backed back-translator (built lazily; needs a model).
describe: ``raw -> description`` (default: the ``description`` / ``text``
field, else the joined string fields).
min_chars: skip artifacts whose description is shorter than this.
Expand All @@ -304,7 +304,7 @@ def generate_cases(
"""
if k < 1:
raise ValueError(f"k must be >= 1, got {k!r}.")
gen = query_generator or make_oa_query_generator()
gen = query_generator or make_default_query_generator()
describe = describe or _default_describe
cases: list[DiscoveryCase] = []
skipped = 0
Expand Down Expand Up @@ -368,7 +368,7 @@ def generate_abstention_cases(
"""Generate ``n`` abstention cases — out-of-scope intents (empty ``gold``)."""
if n <= 0:
return []
gen = generator or make_oa_abstention_generator()
gen = generator or make_default_abstention_generator()
intents = gen(n=n, theme=theme)
cases = [
DiscoveryCase(
Expand Down
12 changes: 6 additions & 6 deletions ir/formulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
lives in the agent layer (``raglab``), not here.

:func:`make_llm_formulator` mirrors :func:`ir.select.make_llm_selector`: an
injectable ``rewriter`` callable, built lazily on :mod:`oa` when omitted (so
injectable ``rewriter`` callable, built lazily on :mod:`aix` when omitted (so
importing ir stays offline), falling back to identity on any failure — a
formulator must never make retrieval *worse* than the raw query.
"""
Expand Down Expand Up @@ -47,13 +47,13 @@ def identity_formulator(query: str) -> str:


def _default_llm_rewriter(prompt: str, n: int, **prompt_function_kwargs: Any):
"""Build the default LLM rewriter on :mod:`oa` (lazy import)."""
import oa
"""Build the default LLM rewriter on :mod:`aix` (lazy import)."""
import aix

def _parse_lines(text: str) -> list[str]:
return [line.strip(" -\t") for line in str(text).splitlines() if line.strip()]

fn = oa.prompt_function(
fn = aix.prompt_func(
prompt, egress=_parse_lines, name="formulate_queries", **prompt_function_kwargs
)

Expand All @@ -74,8 +74,8 @@ def make_llm_formulator(
"""An LLM-backed :data:`Formulator` (rewrite / expand / multi-query).

``rewriter`` is an injectable ``query -> str | [str, ...]`` callable (a test
double, or your own router); when omitted it is built lazily on :mod:`oa`
(``oa.prompt_function``), so importing this module stays offline. ``n`` is the
double, or your own router); when omitted it is built lazily on :mod:`aix`
(``aix.prompt_func``), so importing this module stays offline. ``n`` is the
multi-query fan-out width. Any error or empty reply falls back to ``fallback``
(default: :func:`identity_formulator`).
"""
Expand Down
Loading
Loading