Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .githooks/commit-msg
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh
set -eu

commit_msg_file="$1"
first_line="$(sed -n '1p' "$commit_msg_file")"

case "$first_line" in
Merge\ *|Revert\ *|fixup!\ *|squash!\ *)
exit 0
;;
esac

if printf '%s\n' "$first_line" | grep -Eq '^(feat|fix|docs|test|chore|refactor|perf|ci|build)(\([A-Za-z0-9._-]+\))?!?: .+'; then
exit 0
fi

cat >&2 <<'EOF'
Invalid commit message.

Use Conventional Commits:
feat: add evaluator retry coverage
fix(cli): handle missing settings file
docs: update harness guide

Allowed types: feat, fix, docs, test, chore, refactor, perf, ci, build
EOF

exit 1
18 changes: 18 additions & 0 deletions .githooks/post-merge
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/sh
set -eu

REPO_ROOT="$(git rev-parse --show-toplevel)"
cd "$REPO_ROOT"

changed_files="$(git diff --name-only ORIG_HEAD..HEAD 2>/dev/null || true)"

if printf '%s\n' "$changed_files" | grep -Eq '^(package\.json|bun\.lock)$'; then
echo "Root dependencies changed; running bun install..."
bun install
fi

if printf '%s\n' "$changed_files" | grep -Eq '^web/(package\.json|bun\.lock)$'; then
echo "Web dependencies changed; running bun install in web/..."
cd "$REPO_ROOT/web"
bun install
fi
21 changes: 21 additions & 0 deletions .githooks/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/sh
set -eu

echo "Running pre-commit checks..."

REPO_ROOT="$(git rev-parse --show-toplevel)"
cd "$REPO_ROOT"

echo "==> Git whitespace checks"
git diff --cached --check

echo "==> Biome staged-file checks"
bunx biome check --staged --files-ignore-unknown=true --no-errors-on-unmatched

echo "==> TypeScript typecheck"
bun run typecheck

echo "==> Unit tests"
bun test

echo "All pre-commit checks passed."
52 changes: 52 additions & 0 deletions .githooks/pre-push
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/sh
set -eu

echo "Running pre-push checks..."

REPO_ROOT="$(git rev-parse --show-toplevel)"
cd "$REPO_ROOT"

changed_files=""

while read -r local_ref local_sha remote_ref remote_sha; do
case "$local_sha" in
0000000000000000000000000000000000000000)
continue
;;
esac

case "$remote_sha" in
0000000000000000000000000000000000000000)
range="$(git merge-base origin/main "$local_sha")..$local_sha"
;;
*)
range="$remote_sha..$local_sha"
;;
esac

changed_files="$changed_files
$(git diff --name-only "$range")"
done

if [ -z "$(printf '%s' "$changed_files" | tr -d '[:space:]')" ]; then
upstream="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || true)"
if [ -n "$upstream" ]; then
changed_files="$(git diff --name-only "$upstream"..HEAD)"
else
changed_files="$(git diff --name-only origin/main..HEAD)"
fi
fi

echo "==> Root checks"
bun run check

if printf '%s\n' "$changed_files" | grep -q '^web/'; then
echo "==> Web build"
cd "$REPO_ROOT/web"
bun install
bun run build
else
echo "No web changes detected; skipping web build."
fi

echo "All pre-push checks passed."
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"typecheck": "tsc --noEmit",
"lint": "bunx biome check src/",
"lint:fix": "bunx biome check --write src/",
"test": "bun test"
"test": "bun test",
"prepare": "git config core.hooksPath .githooks"
},
"dependencies": {
"commander": "^12.0.0",
Expand Down
93 changes: 93 additions & 0 deletions src/db/bootstrap.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { existsSync, mkdirSync, readFileSync } from 'fs'
import { join } from 'path'
import { DEFAULT_CRITERIA } from '../criteria/defaults'

type RunStatement = (statement: string) => void

export interface DefaultCriterionRow {
id: string
name: string
description: string
rubric: string
scoreType: 'binary' | 'categorical' | 'metric'
scaleConfig: string
weight: number
isDefault: boolean
}

export function ensureDataDir(repoRoot: string): string {
const dataDir = join(repoRoot, 'data')
if (!existsSync(dataDir)) {
mkdirSync(dataDir, { recursive: true })
}
return dataDir
}

export function applySchemaMigrations(run: RunStatement, repoRoot: string) {
const migrationPath = join(repoRoot, 'src/db/migrations/0000_tough_harry_osborn.sql')
if (existsSync(migrationPath)) {
const sql = readFileSync(migrationPath, 'utf-8')
for (const statement of sql.split(';').filter((s) => s.trim())) {
runIgnoringExpectedErrors(run, statement)
}
}

for (const statement of [
'ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance',
'ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT',
'ALTER TABLE eval_results ADD COLUMN agent_trace TEXT',
'ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT',
'ALTER TABLE eval_sets ADD COLUMN simulator_prompt TEXT',
'ALTER TABLE eval_sets ADD COLUMN simulator_agent_type TEXT',
"ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'",
'ALTER TABLE eval_cases ADD COLUMN expected_output TEXT',
`CREATE TABLE IF NOT EXISTS token_usage (
id TEXT PRIMARY KEY NOT NULL,
run_id TEXT REFERENCES eval_runs(id),
case_id TEXT,
scope TEXT NOT NULL,
model TEXT NOT NULL,
prompt_tokens_est INTEGER,
response_tokens_est INTEGER,
total_tokens_est INTEGER,
latency_ms INTEGER,
status TEXT NOT NULL,
error TEXT,
timestamp INTEGER NOT NULL
)`,
]) {
runIgnoringExpectedErrors(run, statement)
}
}

export function defaultCriterionRows(existingIds: Set<string>): DefaultCriterionRow[] {
return DEFAULT_CRITERIA.filter((criterion) => !existingIds.has(criterion.id)).map((criterion) => ({
id: criterion.id,
name: criterion.name,
description: criterion.description || '',
rubric: criterion.rubric,
scoreType: criterion.scoreType,
scaleConfig: JSON.stringify(criterion.scaleConfig || {}),
weight: criterion.weight,
isDefault: true,
}))
}

function runIgnoringExpectedErrors(run: RunStatement, statement: string) {
try {
run(statement)
} catch (error) {
if (isExpectedMigrationError(error)) return
throw error
}
}

function isExpectedMigrationError(error: unknown): boolean {
const message = String(error)
return (
message.includes('already exists') ||
message.includes('duplicate column name') ||
message.includes('no such column: "expected_answer"') ||
message.includes('no such column: expected_answer')
)
}
108 changes: 6 additions & 102 deletions src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,12 @@

import { Database } from 'bun:sqlite'
import { drizzle } from 'drizzle-orm/bun-sqlite'
import { existsSync, mkdirSync } from 'fs'
import { join } from 'path'
import { applySchemaMigrations, defaultCriterionRows, ensureDataDir } from './bootstrap'
import * as schema from './schema'

// Ensure data directory exists
const dataDir = join(process.cwd(), 'data')
if (!existsSync(dataDir)) {
mkdirSync(dataDir, { recursive: true })
}

// Initialize SQLite connection
const dataDir = ensureDataDir(process.cwd())
const sqlite = new Database(join(dataDir, 'seer.db'))
export const db = drizzle(sqlite, { schema })

Expand All @@ -24,112 +19,21 @@ export const db = drizzle(sqlite, { schema })
export async function initializeDB() {
console.log('Initializing database...')

// Import seed function
const { seedDefaultCriteria } = await import('./seed')

// One-time migration: rename expected_answer → eval_guidance
try {
sqlite.run('ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance')
console.log('✓ Migrated: expected_answer → eval_guidance')
} catch {
// Column already renamed or doesn't exist — expected after first run
}

// One-time migration: add agent_schema to eval_sets
try {
sqlite.run('ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT')
console.log('✓ Added: eval_sets.agent_schema')
} catch {
// Column already exists — expected after first run
}

// One-time migration: add agent_trace to eval_results
try {
sqlite.run('ALTER TABLE eval_results ADD COLUMN agent_trace TEXT')
console.log('✓ Added: eval_results.agent_trace')
} catch {
// Column already exists — expected after first run
}

// One-time migration: add agent_prompt to eval_sets
try {
sqlite.run('ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT')
console.log('✓ Added: eval_sets.agent_prompt')
} catch {
// Column already exists — expected after first run
}

// One-time migration: add simulator_prompt and simulator_agent_type to eval_sets
for (const col of ['simulator_prompt', 'simulator_agent_type']) {
try {
sqlite.run(`ALTER TABLE eval_sets ADD COLUMN ${col} TEXT`)
console.log(`✓ Added: eval_sets.${col}`)
} catch {
// Column already exists
}
}

// v0.2.0: add mode to eval_sets
try {
sqlite.run("ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'")
console.log('Added: eval_sets.mode')
} catch {
// Column already exists
}

// v0.2.0: add expected_output to eval_cases
try {
sqlite.run('ALTER TABLE eval_cases ADD COLUMN expected_output TEXT')
console.log('Added: eval_cases.expected_output')
} catch {
// Column already exists
}

// v0.2.0: create token_usage table
try {
sqlite.run(`CREATE TABLE IF NOT EXISTS token_usage (
id TEXT PRIMARY KEY NOT NULL,
run_id TEXT REFERENCES eval_runs(id),
case_id TEXT,
scope TEXT NOT NULL,
model TEXT NOT NULL,
prompt_tokens_est INTEGER,
response_tokens_est INTEGER,
total_tokens_est INTEGER,
latency_ms INTEGER,
status TEXT NOT NULL,
error TEXT,
timestamp INTEGER NOT NULL
)`)
} catch {
// Table already exists
}
applySchemaMigrations((statement) => sqlite.run(statement), process.cwd())

// Check if default criteria already exist
const existing = await db.select().from(schema.evalCriteria)

if (existing.length === 0) {
console.log('Seeding default criteria...')
await seedDefaultCriteria()
await db.insert(schema.evalCriteria).values(defaultCriterionRows(new Set()))
console.log('✓ Default criteria seeded')
} else {
// Ensure new default criteria are added (e.g., instruction_following)
const { DEFAULT_CRITERIA } = await import('../criteria/defaults')
const existingIds = new Set(existing.map((c) => c.id))
const missingCriteria = DEFAULT_CRITERIA.filter((c) => !existingIds.has(c.id))
const missingCriteria = defaultCriterionRows(existingIds)
if (missingCriteria.length > 0) {
await db.insert(schema.evalCriteria).values(
missingCriteria.map((c) => ({
id: c.id,
name: c.name,
description: c.description || '',
rubric: c.rubric,
scoreType: c.scoreType,
scaleConfig: JSON.stringify(c.scaleConfig || {}),
weight: c.weight,
isDefault: true,
})),
)
await db.insert(schema.evalCriteria).values(missingCriteria)
console.log(
`✓ Added ${missingCriteria.length} new default criteria: ${missingCriteria.map((c) => c.id).join(', ')}`,
)
Expand Down
Loading
Loading