Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,12 @@ GEMINI_API_KEY=

# Max iterations before stopping (default: 100)
# OODA_MAX_ITERATIONS=100

# ── Executive Harness ────────────────────────────────────────────────────
# Coding agent CLI for spawning autoresearch instances
# Options: 'claude-code', 'codex', or null (manual mode)
# AGENT_COMMAND=claude-code

# Autoresearch Hub URL (for agenthub multi-agent mode)
# AUTORESEARCH_HUB_URL=https://autoresearchhub.com
# AUTORESEARCH_API_KEY=
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
node_modules/
.env
_ooda/
.executive/
*.db
*.db-journal
.DS_Store
autoresearch/
results.tsv
run.log
305 changes: 157 additions & 148 deletions README.md

Large diffs are not rendered by default.

115 changes: 115 additions & 0 deletions examples/executive.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// executive.config.js — Example executive harness configuration
//
// This example shows how to orchestrate multiple autoresearch instances
// toward business goals. The executive decomposes goals into research
// tracks, spawns autoresearch agents, and manages the portfolio.

module.exports = {
// ── Identity ──────────────────────────────────────────────────────────
name: 'LLM Training Optimization',
domainContext: 'Optimizing a small GPT model across multiple dimensions simultaneously',

// ── Business goals ────────────────────────────────────────────────────
// The executive translates these into research tracks.
// Each goal has a metric, current value, target, and weight.
goals: [
{
id: 'model-quality',
description: 'Improve model quality (val_bpb)',
metric: 'val_bpb',
current: 0.998,
target: 0.950,
weight: 0.5,
direction: 'lower',
},
{
id: 'memory-efficiency',
description: 'Reduce GPU memory usage',
metric: 'peak_vram_gb',
current: 12.0,
target: 8.0,
weight: 0.3,
direction: 'lower',
},
{
id: 'training-speed',
description: 'Maximize tokens processed per second',
metric: 'mfu_percent',
current: 30,
target: 50,
weight: 0.2,
direction: 'higher',
},
],

// ── Pre-defined research tracks (optional) ────────────────────────────
// If omitted, the executive uses LLM to decompose goals into tracks.
// Pre-defining gives you explicit control over what gets researched.
tracks: [
{
id: 'arch-depth',
name: 'Architecture Depth Exploration',
description: 'Find optimal model depth vs width tradeoff for val_bpb',
approach: 'Systematically vary num_layers and d_model in train.py. Try deeper-narrower and shallower-wider configurations.',
metric: 'val_bpb',
metricDirection: 'lower',
relatedGoals: ['model-quality'],
priority: 'critical',
// These are autoresearch-native: the agent modifies train.py
ml: true,
},
{
id: 'optimizer-tuning',
name: 'Optimizer & LR Schedule',
description: 'Optimize learning rate schedule, warmup, and optimizer hyperparameters',
approach: 'Explore Muon vs AdamW settings, warmup schedules, weight decay on different parameter groups.',
metric: 'val_bpb',
metricDirection: 'lower',
relatedGoals: ['model-quality', 'training-speed'],
priority: 'high',
ml: true,
},
{
id: 'memory-opts',
name: 'Memory Optimization',
description: 'Reduce peak VRAM without hurting val_bpb',
approach: 'Try gradient checkpointing, mixed precision tweaks, batch size reduction with gradient accumulation.',
metric: 'peak_vram_mb',
metricDirection: 'lower',
relatedGoals: ['memory-efficiency'],
priority: 'high',
ml: true,
},
{
id: 'throughput',
name: 'Training Throughput',
description: 'Maximize MFU (model FLOPs utilization)',
approach: 'Optimize data loading, compile settings, kernel fusion, batch sizing for hardware utilization.',
metric: 'mfu_percent',
metricDirection: 'higher',
relatedGoals: ['training-speed'],
priority: 'medium',
ml: true,
},
],

// ── Coding agent command ──────────────────────────────────────────────
// Which CLI to use for spawning autoresearch agents.
// Each instance gets its own program.md and git branch.
//
// Options:
// 'claude-code' — Anthropic's Claude Code CLI
// 'codex' — OpenAI's Codex CLI
// null — Manual mode (prints instructions, you run agents yourself)
agentCommand: null, // Set to 'claude-code' or 'codex' for automatic agent spawning

// ── Executive settings ────────────────────────────────────────────────
maxConcurrentInstances: 4, // How many autoresearch agents run simultaneously
intervalMinutes: 15, // How often the executive reviews the portfolio
maxCycles: 50, // Max executive review cycles
experimentBudgetMinutes: 5, // Per-experiment time budget (autoresearch default)

// ── Paths ─────────────────────────────────────────────────────────────
// workDir: '/path/to/workspace', // Base directory for autoresearch clones
// hubDir: './.executive/_hub', // Where the hub stores state
};
87 changes: 87 additions & 0 deletions examples/executive.generalized.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// executive.generalized.config.js — Example: Non-ML autoresearch
//
// Shows how the executive harness generalizes autoresearch beyond ML.
// The autoresearch PROTOCOL (modify file → run → measure → keep/discard)
// works for any optimization problem. The executive orchestrates multiple
// such loops toward business goals.

module.exports = {
name: 'Full-Stack Product Optimization',
domainContext: 'Optimizing a SaaS product across performance, UX, and conversion',

goals: [
{
id: 'api-latency',
description: 'Reduce API p95 latency',
metric: 'p95_ms',
current: 450,
target: 100,
weight: 0.3,
direction: 'lower',
},
{
id: 'lighthouse',
description: 'Maximize Lighthouse performance score',
metric: 'lighthouse_score',
current: 62,
target: 95,
weight: 0.3,
direction: 'higher',
},
{
id: 'conversion',
description: 'Improve signup conversion rate',
metric: 'conversion_pct',
current: 2.1,
target: 5.0,
weight: 0.4,
direction: 'higher',
},
],

tracks: [
{
id: 'api-perf',
name: 'API Performance',
description: 'Optimize API response times by modifying server configuration and query patterns',
// Generalized autoresearch: specify the target file and eval command
targetFile: 'src/server/config.ts',
evalCommand: 'npm run bench:api | grep p95',
metric: 'p95_ms',
metricDirection: 'lower',
relatedGoals: ['api-latency'],
priority: 'high',
constraints: [
'Do not change the API contract (request/response shapes)',
'Keep all existing tests passing',
],
},
{
id: 'frontend-perf',
name: 'Frontend Performance',
description: 'Improve Lighthouse score by optimizing bundle size, rendering, and asset loading',
targetFile: 'src/app/layout.tsx',
evalCommand: 'npm run lighthouse -- --output=json | node -e "const r=JSON.parse(require(\'fs\').readFileSync(\'/dev/stdin\',\'utf8\')); console.log(\'lighthouse_score:\', r.categories.performance.score * 100)"',
metric: 'lighthouse_score',
metricDirection: 'higher',
relatedGoals: ['lighthouse'],
priority: 'high',
},
{
id: 'landing-copy',
name: 'Landing Page Copy',
description: 'Optimize landing page copy and CTA placement for conversion',
targetFile: 'src/app/page.tsx',
evalCommand: 'npm run test:conversion-proxy',
metric: 'conversion_pct',
metricDirection: 'higher',
relatedGoals: ['conversion'],
priority: 'critical',
},
],

agentCommand: null,
maxConcurrentInstances: 3,
intervalMinutes: 20,
maxCycles: 30,
};
Loading