Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions .claude/skills/mobius-judge/scripts/record_verdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
sys.path.insert(0, "src")

from mobius.config import get_config
from mobius.db import init_db, row_to_dict
from mobius.db import init_db, row_to_dict, vec_to_blob
from mobius.embedder import embed
from mobius.memory import Memory
from mobius.models import MemoryEntry
from mobius.registry import Registry
from mobius.tournament import Tournament

Expand All @@ -39,9 +42,10 @@ def main():
reasoning = args[2]

config = get_config()
conn, _ = init_db(config)
conn, vec_available = init_db(config)
registry = Registry(conn, config)
tournament = Tournament(conn, config, registry)
memory = Memory(conn, config, vec_available)

# Get the match
if match_id:
Expand Down Expand Up @@ -122,6 +126,21 @@ def main():

conn.commit()

# Store in vector memory so future selections benefit
task_text = match.get("task_description", "")
if task_text and full_winner_id:
try:
task_vec = embed(task_text, config)
Comment on lines +129 to +133
memory_entry = MemoryEntry(
task_embedding=vec_to_blob(task_vec),
task_text=task_text,
winning_agent_id=full_winner_id,
score=max(scores.values()) if scores else 0.0,
)
memory.store(memory_entry)
Comment on lines +133 to +140
except Exception as e:
print(f"Warning: failed to store memory entry: {e}", file=sys.stderr)

# Print results
winner = agents_by_id.get(full_winner_id)
print(f"Verdict recorded for match {mid[:8]}")
Expand Down
5 changes: 3 additions & 2 deletions src/mobius/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class MobiusConfig(BaseModel):
agent_timeout_seconds: int = 120
agent_max_turns: int = 10
agent_budget_usd: float = 0.05
agent_max_output_tokens: int = 16384

# Judge
judge_models: list[dict[str, str]] = [
Expand All @@ -41,8 +42,8 @@ class MobiusConfig(BaseModel):
embedding_model: str = "all-MiniLM-L6-v2"
embedding_dim: int = 384
memory_top_k: int = 5
similarity_specialist_threshold: float = 0.9
similarity_ensemble_threshold: float = 0.7
similarity_specialist_threshold: float = 0.5
similarity_ensemble_threshold: float = 0.3

# Self-improvement
max_agent_population: int = 50
Expand Down
13 changes: 12 additions & 1 deletion src/mobius/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,18 @@ def __init__(
self.vec_available = vec_available

def store(self, entry: MemoryEntry) -> None:
"""Store a task outcome in memory."""
"""Store a task outcome in memory, skipping duplicates."""
existing = self.conn.execute(
"SELECT id FROM memory WHERE task_text = ? AND winning_agent_id = ?",
(entry.task_text, entry.winning_agent_id),
).fetchone()
if existing:
Comment on lines +39 to +43
logger.debug(
"Duplicate memory entry for agent %s on task, skipping",
entry.winning_agent_id,
)
return

row = dict_to_row(entry.model_dump(exclude={"task_embedding"}))
cols = ", ".join(row.keys())
placeholders = ", ".join(["?"] * len(row))
Expand Down
4 changes: 2 additions & 2 deletions src/mobius/providers/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ async def _run_simple(
try:
response = await asyncio.wait_for(
client.messages.create(
model=model, max_tokens=4096,
model=model, max_tokens=16384,
system=system_prompt,
messages=[{"role": "user", "content": prompt}],
),
Expand Down Expand Up @@ -115,7 +115,7 @@ async def _run_with_tools(
for turn in range(max_turns):
response = await asyncio.wait_for(
client.messages.create(
model=model, max_tokens=4096,
model=model, max_tokens=16384,
system=system_prompt,
messages=messages,
tools=[ANTHROPIC_BASH_TOOL],
Expand Down
2 changes: 1 addition & 1 deletion src/mobius/providers/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ async def _run_with_tools(
config = types.GenerateContentConfig(
system_instruction=system_prompt,
tools=[bash_tool],
max_output_tokens=4096,
max_output_tokens=16384,
)

contents = [types.Content(
Expand Down
4 changes: 2 additions & 2 deletions src/mobius/providers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ async def _run_simple(
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
],
max_tokens=4096,
max_tokens=16384,
),
timeout=timeout_seconds,
)
Expand Down Expand Up @@ -110,7 +110,7 @@ async def _run_with_tools(
model=model,
messages=messages,
tools=[OPENAI_BASH_TOOL],
max_tokens=4096,
max_tokens=16384,
),
timeout=timeout_seconds,
)
Expand Down
4 changes: 2 additions & 2 deletions src/mobius/providers/openrouter.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ async def _run_simple(
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
],
max_tokens=4096,
max_tokens=16384,
),
timeout=timeout_seconds,
)
Expand Down Expand Up @@ -123,7 +123,7 @@ async def _run_with_tools(
model=model,
messages=messages,
tools=[OPENAI_BASH_TOOL],
max_tokens=4096,
max_tokens=16384,
),
timeout=timeout_seconds,
)
Expand Down
Loading