From f923ce2c7ab8656612dd51230168a69f38df651d Mon Sep 17 00:00:00 2001 From: Fazil Raja Date: Thu, 21 May 2026 09:40:05 -0500 Subject: [PATCH 1/5] feat: add pre-commit CI checks --- .githooks/pre-commit | 13 +++++++++++++ package.json | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100755 .githooks/pre-commit diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..5600697 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Running pre-commit checks..." + +REPO_ROOT="$(git rev-parse --show-toplevel)" + +cd "$REPO_ROOT" && bun run check +if [ $? -ne 0 ]; then + echo "Pre-commit checks failed. Commit aborted." + exit 1 +fi + +echo "All pre-commit checks passed." diff --git a/package.json b/package.json index ec6ac33..8a00a61 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,8 @@ "typecheck": "tsc --noEmit", "lint": "bunx biome check src/", "lint:fix": "bunx biome check --write src/", - "test": "bun test" + "test": "bun test", + "prepare": "git config core.hooksPath .githooks" }, "dependencies": { "commander": "^12.0.0", From f1c1af408e304463c20a36660060f47ed457ec81 Mon Sep 17 00:00:00 2001 From: Fazil Raja Date: Thu, 21 May 2026 09:59:56 -0500 Subject: [PATCH 2/5] ci: expand git hooks --- .githooks/commit-msg | 28 ++++++++++++++++++++++++ .githooks/post-merge | 18 +++++++++++++++ .githooks/pre-commit | 18 ++++++++++----- .githooks/pre-push | 52 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 5 deletions(-) create mode 100755 .githooks/commit-msg create mode 100755 .githooks/post-merge create mode 100755 .githooks/pre-push diff --git a/.githooks/commit-msg b/.githooks/commit-msg new file mode 100755 index 0000000..6d19a4f --- /dev/null +++ b/.githooks/commit-msg @@ -0,0 +1,28 @@ +#!/bin/sh +set -eu + +commit_msg_file="$1" +first_line="$(sed -n '1p' "$commit_msg_file")" + +case "$first_line" in + Merge\ *|Revert\ *|fixup!\ *|squash!\ *) + exit 0 + ;; +esac + +if printf '%s\n' "$first_line" | grep -Eq '^(feat|fix|docs|test|chore|refactor|perf|ci|build)(\([A-Za-z0-9._-]+\))?!?: .+'; then + exit 0 +fi + +cat >&2 <<'EOF' +Invalid commit message. + +Use Conventional Commits: + feat: add evaluator retry coverage + fix(cli): handle missing settings file + docs: update harness guide + +Allowed types: feat, fix, docs, test, chore, refactor, perf, ci, build +EOF + +exit 1 diff --git a/.githooks/post-merge b/.githooks/post-merge new file mode 100755 index 0000000..e99fee1 --- /dev/null +++ b/.githooks/post-merge @@ -0,0 +1,18 @@ +#!/bin/sh +set -eu + +REPO_ROOT="$(git rev-parse --show-toplevel)" +cd "$REPO_ROOT" + +changed_files="$(git diff --name-only ORIG_HEAD..HEAD 2>/dev/null || true)" + +if printf '%s\n' "$changed_files" | grep -Eq '^(package\.json|bun\.lock)$'; then + echo "Root dependencies changed; running bun install..." + bun install +fi + +if printf '%s\n' "$changed_files" | grep -Eq '^web/(package\.json|bun\.lock)$'; then + echo "Web dependencies changed; running bun install in web/..." + cd "$REPO_ROOT/web" + bun install +fi diff --git a/.githooks/pre-commit b/.githooks/pre-commit index 5600697..2a866f0 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -1,13 +1,21 @@ #!/bin/sh +set -eu echo "Running pre-commit checks..." REPO_ROOT="$(git rev-parse --show-toplevel)" +cd "$REPO_ROOT" -cd "$REPO_ROOT" && bun run check -if [ $? -ne 0 ]; then - echo "Pre-commit checks failed. Commit aborted." - exit 1 -fi +echo "==> Git whitespace checks" +git diff --cached --check + +echo "==> Biome staged-file checks" +bunx biome check --staged --files-ignore-unknown=true --no-errors-on-unmatched + +echo "==> TypeScript typecheck" +bun run typecheck + +echo "==> Unit tests" +bun test echo "All pre-commit checks passed." diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100755 index 0000000..967cdda --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,52 @@ +#!/bin/sh +set -eu + +echo "Running pre-push checks..." + +REPO_ROOT="$(git rev-parse --show-toplevel)" +cd "$REPO_ROOT" + +changed_files="" + +while read -r local_ref local_sha remote_ref remote_sha; do + case "$local_sha" in + 0000000000000000000000000000000000000000) + continue + ;; + esac + + case "$remote_sha" in + 0000000000000000000000000000000000000000) + range="$(git merge-base origin/main "$local_sha")..$local_sha" + ;; + *) + range="$remote_sha..$local_sha" + ;; + esac + + changed_files="$changed_files +$(git diff --name-only "$range")" +done + +if [ -z "$(printf '%s' "$changed_files" | tr -d '[:space:]')" ]; then + upstream="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || true)" + if [ -n "$upstream" ]; then + changed_files="$(git diff --name-only "$upstream"..HEAD)" + else + changed_files="$(git diff --name-only origin/main..HEAD)" + fi +fi + +echo "==> Root checks" +bun run check + +if printf '%s\n' "$changed_files" | grep -q '^web/'; then + echo "==> Web build" + cd "$REPO_ROOT/web" + bun install + bun run build +else + echo "No web changes detected; skipping web build." +fi + +echo "All pre-push checks passed." From a943fea0a8d939fa0a257df2a983ac3e55fcaa58 Mon Sep 17 00:00:00 2001 From: Fazil Raja Date: Thu, 21 May 2026 10:04:32 -0500 Subject: [PATCH 3/5] fix: make web build pass --- src/lib/token-ledger.ts | 49 +++++++++++++++++---------- web/app/api/agents/[id]/route.ts | 3 +- web/app/api/criteria/enhance/route.ts | 3 +- web/app/api/generate/route.ts | 3 +- web/app/api/runs/route.ts | 9 +++-- web/app/page.tsx | 2 +- web/components/CaseEditor.tsx | 1 + web/next.config.js | 5 ++- web/tsconfig.json | 4 ++- 9 files changed, 53 insertions(+), 26 deletions(-) diff --git a/src/lib/token-ledger.ts b/src/lib/token-ledger.ts index 483cb2b..c51f78f 100644 --- a/src/lib/token-ledger.ts +++ b/src/lib/token-ledger.ts @@ -12,7 +12,6 @@ */ import { eq } from 'drizzle-orm' -import { db } from '../db/index' import { tokenUsage } from '../db/schema' import { generateId } from './id' @@ -30,6 +29,11 @@ export interface TokenUsageEntry { let _context: { runId?: string; caseId?: string } = {} +type TokenUsageRow = typeof tokenUsage.$inferInsert +type TokenUsageRecorder = (entry: TokenUsageRow) => void | Promise + +let _recorder: TokenUsageRecorder | undefined + export function setLedgerContext(ctx: { runId?: string; caseId?: string }) { _context = { ..._context, ...ctx } } @@ -38,6 +42,10 @@ export function clearLedgerContext() { _context = {} } +export function setTokenUsageRecorder(recorder: TokenUsageRecorder | undefined) { + _recorder = recorder +} + function estimateTokens(chars: number): number { return Math.ceil(chars / 4) } @@ -47,25 +55,32 @@ export function recordTokenUsage(entry: TokenUsageEntry): void { const responseEst = estimateTokens(entry.responseChars) // Fire-and-forget — don't block the eval pipeline - db.insert(tokenUsage) - .values({ - id: generateId(), - runId: entry.runId || _context.runId || null, - caseId: entry.caseId || _context.caseId || null, - scope: entry.scope, - model: entry.model, - promptTokensEst: promptEst, - responseTokensEst: responseEst, - totalTokensEst: promptEst + responseEst, - latencyMs: entry.latencyMs, - status: entry.status, - error: entry.error || null, - timestamp: new Date(), - }) - .catch(() => {}) + const row = { + id: generateId(), + runId: entry.runId || _context.runId || null, + caseId: entry.caseId || _context.caseId || null, + scope: entry.scope, + model: entry.model, + promptTokensEst: promptEst, + responseTokensEst: responseEst, + totalTokensEst: promptEst + responseEst, + latencyMs: entry.latencyMs, + status: entry.status, + error: entry.error || null, + timestamp: new Date(), + } + + const recorder = _recorder || recordWithCliDb + Promise.resolve(recorder(row)).catch(() => {}) +} + +async function recordWithCliDb(entry: TokenUsageRow) { + const { db } = await import(/* webpackIgnore: true */ '../db/index') + await db.insert(tokenUsage).values(entry) } export async function getRunTokenUsage(runId: string) { + const { db } = await import(/* webpackIgnore: true */ '../db/index') return db.select().from(tokenUsage).where(eq(tokenUsage.runId, runId)) } diff --git a/web/app/api/agents/[id]/route.ts b/web/app/api/agents/[id]/route.ts index 2813a63..7877cb5 100644 --- a/web/app/api/agents/[id]/route.ts +++ b/web/app/api/agents/[id]/route.ts @@ -1,6 +1,6 @@ import { NextResponse } from 'next/server' import { fetchAgentInfo } from '../../../../../src/lib/fetch-agent' -import { config } from '../../../../../src/lib/config' +import { getConfig } from '../../../../../src/lib/config' export async function GET( _request: Request, @@ -19,6 +19,7 @@ export async function GET( // Also fetch the schema for snapshot storage let schema = null try { + const config = getConfig() const schemaResp = await fetch( `${config.gleanBackend}/rest/api/v1/agents/${params.id}/schemas`, { headers: { 'Authorization': `Bearer ${config.gleanApiKey}` } } diff --git a/web/app/api/criteria/enhance/route.ts b/web/app/api/criteria/enhance/route.ts index 7f2ac9b..ca42f60 100644 --- a/web/app/api/criteria/enhance/route.ts +++ b/web/app/api/criteria/enhance/route.ts @@ -1,5 +1,5 @@ import { NextResponse } from 'next/server' -import { config } from '../../../../../src/lib/config' +import { getConfig } from '../../../../../src/lib/config' import { extractContentWithFallback, type GleanResponse } from '../../../../../src/lib/extract-content' export async function POST(request: Request) { @@ -61,6 +61,7 @@ Evaluate information density, not length." Now write the rubric for "${name}". Be specific to this dimension — don't write a generic rubric. Output ONLY the rubric text, no preamble.` + const config = getConfig() const resp = await fetch(`${config.gleanBackend}/rest/api/v1/chat`, { method: 'POST', headers: { diff --git a/web/app/api/generate/route.ts b/web/app/api/generate/route.ts index 2f53dae..747306b 100644 --- a/web/app/api/generate/route.ts +++ b/web/app/api/generate/route.ts @@ -1,7 +1,7 @@ import { NextResponse } from 'next/server' import { smartGenerate } from '../../../../src/lib/generate-agent' import { fetchAgentInfo } from '../../../../src/lib/fetch-agent' -import { config } from '../../../../src/lib/config' +import { getConfig } from '../../../../src/lib/config' export async function POST(request: Request) { try { @@ -17,6 +17,7 @@ export async function POST(request: Request) { // Fetch agent info (name + description) const agentInfo = await fetchAgentInfo(agentId) + const config = getConfig() // Fetch agent schema const schemaResp = await fetch( diff --git a/web/app/api/runs/route.ts b/web/app/api/runs/route.ts index 78b76e0..99357b4 100644 --- a/web/app/api/runs/route.ts +++ b/web/app/api/runs/route.ts @@ -1,5 +1,5 @@ import { NextResponse } from 'next/server' -import { db, evalSets, evalCases, evalCriteria, evalRuns, evalResults, evalScores } from '@/lib/db' +import { db, evalSets, evalCases, evalCriteria, evalRuns, evalResults, evalScores, tokenUsage } from '@/lib/db' import { eq, inArray } from 'drizzle-orm' // Import from CLI code @@ -8,9 +8,11 @@ import { judgeResponseBatch, JUDGE_MODELS } from '../../../../src/lib/judge' import { getCriterion } from '../../../../src/criteria/defaults' import { calculateOverallScore } from '../../../../src/lib/score' import { generateId } from '../../../../src/lib/id' -import { setLedgerContext } from '../../../../src/lib/token-ledger' +import { setLedgerContext, setTokenUsageRecorder } from '../../../../src/lib/token-ledger' import type { CriterionDefinition } from '../../../../src/criteria/defaults' +setTokenUsageRecorder((entry) => db.insert(tokenUsage).values(entry)) + export async function POST(request: Request) { try { const body = await request.json() @@ -103,7 +105,7 @@ export async function POST(request: Request) { }) // Process cases (async - don't block response) - processCases(runId, set.agentId, cases, criteriaObjs, judges, multiTurn, maxTurns, agentType, set.agentPrompt || undefined, set.simulatorPrompt || undefined, (set.simulatorAgentType as 'advanced' | 'default') || 'default', safetyPolicy || undefined).catch(err => { + processCases(runId, set.agentId, cases, criteriaObjs, judges, evalSetMode as 'guidance' | 'golden', multiTurn, maxTurns, agentType, set.agentPrompt || undefined, set.simulatorPrompt || undefined, (set.simulatorAgentType as 'advanced' | 'default') || 'default', safetyPolicy || undefined).catch(err => { console.error(`Run ${runId} failed:`, err) db.update(evalRuns).set({ status: 'failed', completedAt: new Date() }).where(eq(evalRuns.id, runId)).catch(console.error) }) @@ -162,6 +164,7 @@ async function processCases( cases: Array<{ id: string; query: string; evalGuidance?: string | null; expectedOutput?: string | null; metadata?: string | null }>, criteria: CriterionDefinition[], judgeModelIds: string[], + evalSetMode: 'guidance' | 'golden', multiTurn: boolean = false, maxTurns: number = 5, agentType: string = 'workflow', diff --git a/web/app/page.tsx b/web/app/page.tsx index 4afe7e9..7888e4c 100644 --- a/web/app/page.tsx +++ b/web/app/page.tsx @@ -7,7 +7,7 @@ export const dynamic = 'force-dynamic' interface EvalSetWithStats { id: string name: string - description: string + description: string | null agentId: string agentType: string | null createdAt: Date diff --git a/web/components/CaseEditor.tsx b/web/components/CaseEditor.tsx index 84f5f9f..9e0aded 100644 --- a/web/components/CaseEditor.tsx +++ b/web/components/CaseEditor.tsx @@ -7,6 +7,7 @@ interface EvalCase { query: string evalGuidance: string | null context: string | null + createdAt: Date } interface CaseEditorProps { diff --git a/web/next.config.js b/web/next.config.js index 921cbc8..d407e5e 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -3,7 +3,10 @@ const nextConfig = { reactStrictMode: true, // Enable webpack caching for faster builds webpack: (config) => { - // Bun SQLite is handled natively + config.resolve.alias = { + ...config.resolve.alias, + 'drizzle-orm': require.resolve('drizzle-orm'), + } return config }, } diff --git a/web/tsconfig.json b/web/tsconfig.json index d8b9323..658bbab 100644 --- a/web/tsconfig.json +++ b/web/tsconfig.json @@ -19,7 +19,9 @@ } ], "paths": { - "@/*": ["./*"] + "@/*": ["./*"], + "drizzle-orm": ["../node_modules/drizzle-orm"], + "drizzle-orm/*": ["../node_modules/drizzle-orm/*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], From 6adb37088661a6774e4fb4ad168e7035d8316e93 Mon Sep 17 00:00:00 2001 From: Fazil Raja Date: Thu, 21 May 2026 11:55:37 -0500 Subject: [PATCH 4/5] fix: initialize web database --- web/lib/db.ts | 162 ++++++++++++++++++++++++++++++++++++++++++++- web/next.config.js | 4 +- 2 files changed, 164 insertions(+), 2 deletions(-) diff --git a/web/lib/db.ts b/web/lib/db.ts index eb05e4e..1674598 100644 --- a/web/lib/db.ts +++ b/web/lib/db.ts @@ -1,14 +1,174 @@ import { drizzle } from 'drizzle-orm/better-sqlite3' import Database from 'better-sqlite3' +import { existsSync, mkdirSync } from 'fs' import { join } from 'path' +import { DEFAULT_CRITERIA } from '../../src/criteria/defaults' import * as schema from '../../src/db/schema' // Point to same database as CLI // In Next.js, process.cwd() is the web/ directory -const dbPath = join(process.cwd(), '..', 'data', 'seer.db') +const dataDir = join(process.cwd(), '..', 'data') +if (!existsSync(dataDir)) { + mkdirSync(dataDir, { recursive: true }) +} + +const dbPath = join(dataDir, 'seer.db') const sqlite = new Database(dbPath) +initializeSchema(sqlite) + export const db = drizzle(sqlite, { schema }) // Re-export schema for convenience export * from '../../src/db/schema' + +function initializeSchema(database: Database.Database) { + database.exec(` + CREATE TABLE IF NOT EXISTS eval_sets ( + id TEXT PRIMARY KEY NOT NULL, + name TEXT NOT NULL, + description TEXT, + agent_id TEXT NOT NULL, + agent_schema TEXT, + agent_type TEXT, + agent_prompt TEXT, + simulator_prompt TEXT, + simulator_agent_type TEXT, + mode TEXT NOT NULL DEFAULT 'guidance', + created_at INTEGER NOT NULL + ); + + CREATE TABLE IF NOT EXISTS eval_cases ( + id TEXT PRIMARY KEY NOT NULL, + eval_set_id TEXT NOT NULL REFERENCES eval_sets(id), + query TEXT NOT NULL, + eval_guidance TEXT, + expected_output TEXT, + context TEXT, + metadata TEXT, + created_at INTEGER NOT NULL + ); + + CREATE TABLE IF NOT EXISTS eval_criteria ( + id TEXT PRIMARY KEY NOT NULL, + name TEXT NOT NULL, + description TEXT, + rubric TEXT NOT NULL, + score_type TEXT NOT NULL, + scale_config TEXT, + weight REAL NOT NULL DEFAULT 1.0, + is_default INTEGER NOT NULL DEFAULT 0 + ); + + CREATE TABLE IF NOT EXISTS eval_runs ( + id TEXT PRIMARY KEY NOT NULL, + eval_set_id TEXT NOT NULL REFERENCES eval_sets(id), + started_at INTEGER NOT NULL, + completed_at INTEGER, + status TEXT NOT NULL, + config TEXT + ); + + CREATE TABLE IF NOT EXISTS eval_results ( + id TEXT PRIMARY KEY NOT NULL, + run_id TEXT NOT NULL REFERENCES eval_runs(id), + case_id TEXT NOT NULL REFERENCES eval_cases(id), + agent_response TEXT NOT NULL, + agent_trace TEXT, + transcript TEXT, + latency_ms INTEGER NOT NULL, + total_tokens INTEGER, + tool_calls TEXT, + overall_score REAL NOT NULL, + timestamp INTEGER NOT NULL + ); + + CREATE TABLE IF NOT EXISTS token_usage ( + id TEXT PRIMARY KEY NOT NULL, + run_id TEXT REFERENCES eval_runs(id), + case_id TEXT, + scope TEXT NOT NULL, + model TEXT NOT NULL, + prompt_tokens_est INTEGER, + response_tokens_est INTEGER, + total_tokens_est INTEGER, + latency_ms INTEGER, + status TEXT NOT NULL, + error TEXT, + timestamp INTEGER NOT NULL + ); + + CREATE TABLE IF NOT EXISTS eval_scores ( + id TEXT PRIMARY KEY NOT NULL, + result_id TEXT NOT NULL REFERENCES eval_results(id), + criterion_id TEXT NOT NULL REFERENCES eval_criteria(id), + score_value REAL, + score_category TEXT, + reasoning TEXT NOT NULL, + judge_model TEXT, + ensemble_run_id TEXT, + timestamp INTEGER NOT NULL + ); + `) + + runMigration(database, 'ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance') + for (const statement of [ + 'ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT', + 'ALTER TABLE eval_results ADD COLUMN agent_trace TEXT', + 'ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT', + 'ALTER TABLE eval_sets ADD COLUMN simulator_prompt TEXT', + 'ALTER TABLE eval_sets ADD COLUMN simulator_agent_type TEXT', + "ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'", + 'ALTER TABLE eval_cases ADD COLUMN expected_output TEXT', + ]) { + runMigration(database, statement) + } + + seedDefaultCriteria(database) +} + +function runMigration(database: Database.Database, statement: string) { + try { + database.exec(statement) + } catch { + // Column/table already migrated, or legacy source column does not exist. + } +} + +function seedDefaultCriteria(database: Database.Database) { + const existingRows = database.prepare('SELECT id FROM eval_criteria').all() as Array<{ id: string }> + const existingIds = new Set(existingRows.map((row) => row.id)) + const missingCriteria = DEFAULT_CRITERIA.filter((criterion) => !existingIds.has(criterion.id)) + + if (missingCriteria.length === 0) return + + const insert = database.prepare(` + INSERT INTO eval_criteria ( + id, + name, + description, + rubric, + score_type, + scale_config, + weight, + is_default + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + `) + + const insertMany = database.transaction(() => { + for (const criterion of missingCriteria) { + insert.run( + criterion.id, + criterion.name, + criterion.description || '', + criterion.rubric, + criterion.scoreType, + JSON.stringify(criterion.scaleConfig || {}), + criterion.weight, + 1, + ) + } + }) + + insertMany() +} diff --git a/web/next.config.js b/web/next.config.js index d407e5e..2c5fd5e 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -1,11 +1,13 @@ /** @type {import('next').NextConfig} */ +const path = require('path') + const nextConfig = { reactStrictMode: true, // Enable webpack caching for faster builds webpack: (config) => { config.resolve.alias = { ...config.resolve.alias, - 'drizzle-orm': require.resolve('drizzle-orm'), + 'drizzle-orm': path.resolve(__dirname, '../node_modules/drizzle-orm'), } return config }, From ab3960472a3ee0c7acb9d544fc8fe8c0be9f2972 Mon Sep 17 00:00:00 2001 From: Fazil Raja Date: Thu, 21 May 2026 12:10:35 -0500 Subject: [PATCH 5/5] fix: share web database bootstrap --- src/db/bootstrap.ts | 93 ++++++++++++++++++++++++++++++ src/db/index.ts | 108 ++--------------------------------- src/db/migrate.ts | 33 ++--------- web/lib/db.ts | 134 +++----------------------------------------- web/next.config.js | 10 ---- 5 files changed, 112 insertions(+), 266 deletions(-) create mode 100644 src/db/bootstrap.ts diff --git a/src/db/bootstrap.ts b/src/db/bootstrap.ts new file mode 100644 index 0000000..5c1c773 --- /dev/null +++ b/src/db/bootstrap.ts @@ -0,0 +1,93 @@ +import { existsSync, mkdirSync, readFileSync } from 'fs' +import { join } from 'path' +import { DEFAULT_CRITERIA } from '../criteria/defaults' + +type RunStatement = (statement: string) => void + +export interface DefaultCriterionRow { + id: string + name: string + description: string + rubric: string + scoreType: 'binary' | 'categorical' | 'metric' + scaleConfig: string + weight: number + isDefault: boolean +} + +export function ensureDataDir(repoRoot: string): string { + const dataDir = join(repoRoot, 'data') + if (!existsSync(dataDir)) { + mkdirSync(dataDir, { recursive: true }) + } + return dataDir +} + +export function applySchemaMigrations(run: RunStatement, repoRoot: string) { + const migrationPath = join(repoRoot, 'src/db/migrations/0000_tough_harry_osborn.sql') + if (existsSync(migrationPath)) { + const sql = readFileSync(migrationPath, 'utf-8') + for (const statement of sql.split(';').filter((s) => s.trim())) { + runIgnoringExpectedErrors(run, statement) + } + } + + for (const statement of [ + 'ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance', + 'ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT', + 'ALTER TABLE eval_results ADD COLUMN agent_trace TEXT', + 'ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT', + 'ALTER TABLE eval_sets ADD COLUMN simulator_prompt TEXT', + 'ALTER TABLE eval_sets ADD COLUMN simulator_agent_type TEXT', + "ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'", + 'ALTER TABLE eval_cases ADD COLUMN expected_output TEXT', + `CREATE TABLE IF NOT EXISTS token_usage ( + id TEXT PRIMARY KEY NOT NULL, + run_id TEXT REFERENCES eval_runs(id), + case_id TEXT, + scope TEXT NOT NULL, + model TEXT NOT NULL, + prompt_tokens_est INTEGER, + response_tokens_est INTEGER, + total_tokens_est INTEGER, + latency_ms INTEGER, + status TEXT NOT NULL, + error TEXT, + timestamp INTEGER NOT NULL + )`, + ]) { + runIgnoringExpectedErrors(run, statement) + } +} + +export function defaultCriterionRows(existingIds: Set): DefaultCriterionRow[] { + return DEFAULT_CRITERIA.filter((criterion) => !existingIds.has(criterion.id)).map((criterion) => ({ + id: criterion.id, + name: criterion.name, + description: criterion.description || '', + rubric: criterion.rubric, + scoreType: criterion.scoreType, + scaleConfig: JSON.stringify(criterion.scaleConfig || {}), + weight: criterion.weight, + isDefault: true, + })) +} + +function runIgnoringExpectedErrors(run: RunStatement, statement: string) { + try { + run(statement) + } catch (error) { + if (isExpectedMigrationError(error)) return + throw error + } +} + +function isExpectedMigrationError(error: unknown): boolean { + const message = String(error) + return ( + message.includes('already exists') || + message.includes('duplicate column name') || + message.includes('no such column: "expected_answer"') || + message.includes('no such column: expected_answer') + ) +} diff --git a/src/db/index.ts b/src/db/index.ts index 9269e8a..eeeae21 100644 --- a/src/db/index.ts +++ b/src/db/index.ts @@ -4,17 +4,12 @@ import { Database } from 'bun:sqlite' import { drizzle } from 'drizzle-orm/bun-sqlite' -import { existsSync, mkdirSync } from 'fs' import { join } from 'path' +import { applySchemaMigrations, defaultCriterionRows, ensureDataDir } from './bootstrap' import * as schema from './schema' -// Ensure data directory exists -const dataDir = join(process.cwd(), 'data') -if (!existsSync(dataDir)) { - mkdirSync(dataDir, { recursive: true }) -} - // Initialize SQLite connection +const dataDir = ensureDataDir(process.cwd()) const sqlite = new Database(join(dataDir, 'seer.db')) export const db = drizzle(sqlite, { schema }) @@ -24,112 +19,21 @@ export const db = drizzle(sqlite, { schema }) export async function initializeDB() { console.log('Initializing database...') - // Import seed function - const { seedDefaultCriteria } = await import('./seed') - - // One-time migration: rename expected_answer → eval_guidance - try { - sqlite.run('ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance') - console.log('✓ Migrated: expected_answer → eval_guidance') - } catch { - // Column already renamed or doesn't exist — expected after first run - } - - // One-time migration: add agent_schema to eval_sets - try { - sqlite.run('ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT') - console.log('✓ Added: eval_sets.agent_schema') - } catch { - // Column already exists — expected after first run - } - - // One-time migration: add agent_trace to eval_results - try { - sqlite.run('ALTER TABLE eval_results ADD COLUMN agent_trace TEXT') - console.log('✓ Added: eval_results.agent_trace') - } catch { - // Column already exists — expected after first run - } - - // One-time migration: add agent_prompt to eval_sets - try { - sqlite.run('ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT') - console.log('✓ Added: eval_sets.agent_prompt') - } catch { - // Column already exists — expected after first run - } - - // One-time migration: add simulator_prompt and simulator_agent_type to eval_sets - for (const col of ['simulator_prompt', 'simulator_agent_type']) { - try { - sqlite.run(`ALTER TABLE eval_sets ADD COLUMN ${col} TEXT`) - console.log(`✓ Added: eval_sets.${col}`) - } catch { - // Column already exists - } - } - - // v0.2.0: add mode to eval_sets - try { - sqlite.run("ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'") - console.log('Added: eval_sets.mode') - } catch { - // Column already exists - } - - // v0.2.0: add expected_output to eval_cases - try { - sqlite.run('ALTER TABLE eval_cases ADD COLUMN expected_output TEXT') - console.log('Added: eval_cases.expected_output') - } catch { - // Column already exists - } - - // v0.2.0: create token_usage table - try { - sqlite.run(`CREATE TABLE IF NOT EXISTS token_usage ( - id TEXT PRIMARY KEY NOT NULL, - run_id TEXT REFERENCES eval_runs(id), - case_id TEXT, - scope TEXT NOT NULL, - model TEXT NOT NULL, - prompt_tokens_est INTEGER, - response_tokens_est INTEGER, - total_tokens_est INTEGER, - latency_ms INTEGER, - status TEXT NOT NULL, - error TEXT, - timestamp INTEGER NOT NULL - )`) - } catch { - // Table already exists - } + applySchemaMigrations((statement) => sqlite.run(statement), process.cwd()) // Check if default criteria already exist const existing = await db.select().from(schema.evalCriteria) if (existing.length === 0) { console.log('Seeding default criteria...') - await seedDefaultCriteria() + await db.insert(schema.evalCriteria).values(defaultCriterionRows(new Set())) console.log('✓ Default criteria seeded') } else { // Ensure new default criteria are added (e.g., instruction_following) - const { DEFAULT_CRITERIA } = await import('../criteria/defaults') const existingIds = new Set(existing.map((c) => c.id)) - const missingCriteria = DEFAULT_CRITERIA.filter((c) => !existingIds.has(c.id)) + const missingCriteria = defaultCriterionRows(existingIds) if (missingCriteria.length > 0) { - await db.insert(schema.evalCriteria).values( - missingCriteria.map((c) => ({ - id: c.id, - name: c.name, - description: c.description || '', - rubric: c.rubric, - scoreType: c.scoreType, - scaleConfig: JSON.stringify(c.scaleConfig || {}), - weight: c.weight, - isDefault: true, - })), - ) + await db.insert(schema.evalCriteria).values(missingCriteria) console.log( `✓ Added ${missingCriteria.length} new default criteria: ${missingCriteria.map((c) => c.id).join(', ')}`, ) diff --git a/src/db/migrate.ts b/src/db/migrate.ts index 9596402..caaa8a5 100644 --- a/src/db/migrate.ts +++ b/src/db/migrate.ts @@ -3,38 +3,13 @@ */ import { Database } from 'bun:sqlite' -import { existsSync, mkdirSync, readFileSync } from 'fs' import { join } from 'path' +import { applySchemaMigrations, ensureDataDir } from './bootstrap' -const dataDir = join(process.cwd(), 'data') -if (!existsSync(dataDir)) { - mkdirSync(dataDir, { recursive: true }) -} - +const dataDir = ensureDataDir(process.cwd()) const db = new Database(join(dataDir, 'seer.db')) -// Read and execute the migration SQL -const migrationPath = join(process.cwd(), 'src/db/migrations/0000_tough_harry_osborn.sql') - -if (existsSync(migrationPath)) { - const sql = readFileSync(migrationPath, 'utf-8') - - // Execute each statement - const statements = sql.split(';').filter((s) => s.trim()) - for (const statement of statements) { - try { - db.run(statement) - } catch (error) { - // Ignore errors for already existing tables - if (!String(error).includes('already exists')) { - console.error('Migration error:', error) - } - } - } - - console.log('✓ Database migrations applied') -} else { - console.log('No migration file found, skipping...') -} +applySchemaMigrations((statement) => db.run(statement), process.cwd()) +console.log('✓ Database migrations applied') db.close() diff --git a/web/lib/db.ts b/web/lib/db.ts index 1674598..279b75d 100644 --- a/web/lib/db.ts +++ b/web/lib/db.ts @@ -1,144 +1,28 @@ import { drizzle } from 'drizzle-orm/better-sqlite3' import Database from 'better-sqlite3' -import { existsSync, mkdirSync } from 'fs' import { join } from 'path' -import { DEFAULT_CRITERIA } from '../../src/criteria/defaults' +import { applySchemaMigrations, defaultCriterionRows, ensureDataDir } from '../../src/db/bootstrap' import * as schema from '../../src/db/schema' // Point to same database as CLI // In Next.js, process.cwd() is the web/ directory -const dataDir = join(process.cwd(), '..', 'data') -if (!existsSync(dataDir)) { - mkdirSync(dataDir, { recursive: true }) -} - +const repoRoot = join(process.cwd(), '..') +const dataDir = ensureDataDir(repoRoot) const dbPath = join(dataDir, 'seer.db') const sqlite = new Database(dbPath) -initializeSchema(sqlite) +applySchemaMigrations((statement) => sqlite.exec(statement), repoRoot) export const db = drizzle(sqlite, { schema }) +seedDefaultCriteria(sqlite) // Re-export schema for convenience export * from '../../src/db/schema' -function initializeSchema(database: Database.Database) { - database.exec(` - CREATE TABLE IF NOT EXISTS eval_sets ( - id TEXT PRIMARY KEY NOT NULL, - name TEXT NOT NULL, - description TEXT, - agent_id TEXT NOT NULL, - agent_schema TEXT, - agent_type TEXT, - agent_prompt TEXT, - simulator_prompt TEXT, - simulator_agent_type TEXT, - mode TEXT NOT NULL DEFAULT 'guidance', - created_at INTEGER NOT NULL - ); - - CREATE TABLE IF NOT EXISTS eval_cases ( - id TEXT PRIMARY KEY NOT NULL, - eval_set_id TEXT NOT NULL REFERENCES eval_sets(id), - query TEXT NOT NULL, - eval_guidance TEXT, - expected_output TEXT, - context TEXT, - metadata TEXT, - created_at INTEGER NOT NULL - ); - - CREATE TABLE IF NOT EXISTS eval_criteria ( - id TEXT PRIMARY KEY NOT NULL, - name TEXT NOT NULL, - description TEXT, - rubric TEXT NOT NULL, - score_type TEXT NOT NULL, - scale_config TEXT, - weight REAL NOT NULL DEFAULT 1.0, - is_default INTEGER NOT NULL DEFAULT 0 - ); - - CREATE TABLE IF NOT EXISTS eval_runs ( - id TEXT PRIMARY KEY NOT NULL, - eval_set_id TEXT NOT NULL REFERENCES eval_sets(id), - started_at INTEGER NOT NULL, - completed_at INTEGER, - status TEXT NOT NULL, - config TEXT - ); - - CREATE TABLE IF NOT EXISTS eval_results ( - id TEXT PRIMARY KEY NOT NULL, - run_id TEXT NOT NULL REFERENCES eval_runs(id), - case_id TEXT NOT NULL REFERENCES eval_cases(id), - agent_response TEXT NOT NULL, - agent_trace TEXT, - transcript TEXT, - latency_ms INTEGER NOT NULL, - total_tokens INTEGER, - tool_calls TEXT, - overall_score REAL NOT NULL, - timestamp INTEGER NOT NULL - ); - - CREATE TABLE IF NOT EXISTS token_usage ( - id TEXT PRIMARY KEY NOT NULL, - run_id TEXT REFERENCES eval_runs(id), - case_id TEXT, - scope TEXT NOT NULL, - model TEXT NOT NULL, - prompt_tokens_est INTEGER, - response_tokens_est INTEGER, - total_tokens_est INTEGER, - latency_ms INTEGER, - status TEXT NOT NULL, - error TEXT, - timestamp INTEGER NOT NULL - ); - - CREATE TABLE IF NOT EXISTS eval_scores ( - id TEXT PRIMARY KEY NOT NULL, - result_id TEXT NOT NULL REFERENCES eval_results(id), - criterion_id TEXT NOT NULL REFERENCES eval_criteria(id), - score_value REAL, - score_category TEXT, - reasoning TEXT NOT NULL, - judge_model TEXT, - ensemble_run_id TEXT, - timestamp INTEGER NOT NULL - ); - `) - - runMigration(database, 'ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance') - for (const statement of [ - 'ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT', - 'ALTER TABLE eval_results ADD COLUMN agent_trace TEXT', - 'ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT', - 'ALTER TABLE eval_sets ADD COLUMN simulator_prompt TEXT', - 'ALTER TABLE eval_sets ADD COLUMN simulator_agent_type TEXT', - "ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'", - 'ALTER TABLE eval_cases ADD COLUMN expected_output TEXT', - ]) { - runMigration(database, statement) - } - - seedDefaultCriteria(database) -} - -function runMigration(database: Database.Database, statement: string) { - try { - database.exec(statement) - } catch { - // Column/table already migrated, or legacy source column does not exist. - } -} - function seedDefaultCriteria(database: Database.Database) { const existingRows = database.prepare('SELECT id FROM eval_criteria').all() as Array<{ id: string }> const existingIds = new Set(existingRows.map((row) => row.id)) - const missingCriteria = DEFAULT_CRITERIA.filter((criterion) => !existingIds.has(criterion.id)) + const missingCriteria = defaultCriterionRows(existingIds) if (missingCriteria.length === 0) return @@ -160,12 +44,12 @@ function seedDefaultCriteria(database: Database.Database) { insert.run( criterion.id, criterion.name, - criterion.description || '', + criterion.description, criterion.rubric, criterion.scoreType, - JSON.stringify(criterion.scaleConfig || {}), + criterion.scaleConfig, criterion.weight, - 1, + Number(criterion.isDefault), ) } }) diff --git a/web/next.config.js b/web/next.config.js index 2c5fd5e..a843cbe 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -1,16 +1,6 @@ /** @type {import('next').NextConfig} */ -const path = require('path') - const nextConfig = { reactStrictMode: true, - // Enable webpack caching for faster builds - webpack: (config) => { - config.resolve.alias = { - ...config.resolve.alias, - 'drizzle-orm': path.resolve(__dirname, '../node_modules/drizzle-orm'), - } - return config - }, } module.exports = nextConfig