askscio · ken-cavanagh-glean · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/.githooks/commit-msg b/.githooks/commit-msg
@@ -0,0 +1,28 @@
+#!/bin/sh
+set -eu
+
+commit_msg_file="$1"
+first_line="$(sed -n '1p' "$commit_msg_file")"
+
+case "$first_line" in
+  Merge\ *|Revert\ *|fixup!\ *|squash!\ *)
+    exit 0
+    ;;
+esac
+
+if printf '%s\n' "$first_line" | grep -Eq '^(feat|fix|docs|test|chore|refactor|perf|ci|build)(\([A-Za-z0-9._-]+\))?!?: .+'; then
+  exit 0
+fi
+
+cat >&2 <<'EOF'
+Invalid commit message.
+
+Use Conventional Commits:
+  feat: add evaluator retry coverage
+  fix(cli): handle missing settings file
+  docs: update harness guide
+
+Allowed types: feat, fix, docs, test, chore, refactor, perf, ci, build
+EOF
+
+exit 1
diff --git a/.githooks/post-merge b/.githooks/post-merge
@@ -0,0 +1,18 @@
+#!/bin/sh
+set -eu
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+cd "$REPO_ROOT"
+
+changed_files="$(git diff --name-only ORIG_HEAD..HEAD 2>/dev/null || true)"
+
+if printf '%s\n' "$changed_files" | grep -Eq '^(package\.json|bun\.lock)$'; then
+  echo "Root dependencies changed; running bun install..."
+  bun install
+fi
+
+if printf '%s\n' "$changed_files" | grep -Eq '^web/(package\.json|bun\.lock)$'; then
+  echo "Web dependencies changed; running bun install in web/..."
+  cd "$REPO_ROOT/web"
+  bun install
+fi
diff --git a/.githooks/pre-commit b/.githooks/pre-commit
@@ -0,0 +1,21 @@
+#!/bin/sh
+set -eu
+
+echo "Running pre-commit checks..."
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+cd "$REPO_ROOT"
+
+echo "==> Git whitespace checks"
+git diff --cached --check
+
+echo "==> Biome staged-file checks"
+bunx biome check --staged --files-ignore-unknown=true --no-errors-on-unmatched
+
+echo "==> TypeScript typecheck"
+bun run typecheck
+
+echo "==> Unit tests"
+bun test
+
+echo "All pre-commit checks passed."
diff --git a/.githooks/pre-push b/.githooks/pre-push
@@ -0,0 +1,52 @@
+#!/bin/sh
+set -eu
+
+echo "Running pre-push checks..."
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+cd "$REPO_ROOT"
+
+changed_files=""
+
+while read -r local_ref local_sha remote_ref remote_sha; do
+  case "$local_sha" in
+    0000000000000000000000000000000000000000)
+      continue
+      ;;
+  esac
+
+  case "$remote_sha" in
+    0000000000000000000000000000000000000000)
+      range="$(git merge-base origin/main "$local_sha")..$local_sha"
+      ;;
+    *)
+      range="$remote_sha..$local_sha"
+      ;;
+  esac
+
+  changed_files="$changed_files
+$(git diff --name-only "$range")"
+done
+
+if [ -z "$(printf '%s' "$changed_files" | tr -d '[:space:]')" ]; then
+  upstream="$(git rev-parse --abbrev-ref --symbolic-full-name @{u} 2>/dev/null || true)"
+  if [ -n "$upstream" ]; then
+    changed_files="$(git diff --name-only "$upstream"..HEAD)"
+  else
+    changed_files="$(git diff --name-only origin/main..HEAD)"
+  fi
+fi
+
+echo "==> Root checks"
+bun run check
+
+if printf '%s\n' "$changed_files" | grep -q '^web/'; then
+  echo "==> Web build"
+  cd "$REPO_ROOT/web"
+  bun install
+  bun run build
+else
+  echo "No web changes detected; skipping web build."
+fi
+
+echo "All pre-push checks passed."
diff --git a/package.json b/package.json
@@ -12,7 +12,8 @@
     "typecheck": "tsc --noEmit",
     "lint": "bunx biome check src/",
     "lint:fix": "bunx biome check --write src/",
-    "test": "bun test"
+    "test": "bun test",
+    "prepare": "git config core.hooksPath .githooks"
   },
   "dependencies": {
     "commander": "^12.0.0",

diff --git a/src/db/bootstrap.ts b/src/db/bootstrap.ts
@@ -0,0 +1,93 @@
+import { existsSync, mkdirSync, readFileSync } from 'fs'
+import { join } from 'path'
+import { DEFAULT_CRITERIA } from '../criteria/defaults'
+
+type RunStatement = (statement: string) => void
+
+export interface DefaultCriterionRow {
+  id: string
+  name: string
+  description: string
+  rubric: string
+  scoreType: 'binary' | 'categorical' | 'metric'
+  scaleConfig: string
+  weight: number
+  isDefault: boolean
+}
+
+export function ensureDataDir(repoRoot: string): string {
+  const dataDir = join(repoRoot, 'data')
+  if (!existsSync(dataDir)) {
+    mkdirSync(dataDir, { recursive: true })
+  }
+  return dataDir
+}
+
+export function applySchemaMigrations(run: RunStatement, repoRoot: string) {
+  const migrationPath = join(repoRoot, 'src/db/migrations/0000_tough_harry_osborn.sql')
+  if (existsSync(migrationPath)) {
+    const sql = readFileSync(migrationPath, 'utf-8')
+    for (const statement of sql.split(';').filter((s) => s.trim())) {
+      runIgnoringExpectedErrors(run, statement)
+    }
+  }
+
+  for (const statement of [
+    'ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance',
+    'ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT',
+    'ALTER TABLE eval_results ADD COLUMN agent_trace TEXT',
+    'ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT',
+    'ALTER TABLE eval_sets ADD COLUMN simulator_prompt TEXT',
+    'ALTER TABLE eval_sets ADD COLUMN simulator_agent_type TEXT',
+    "ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'",
+    'ALTER TABLE eval_cases ADD COLUMN expected_output TEXT',
+    `CREATE TABLE IF NOT EXISTS token_usage (
+      id TEXT PRIMARY KEY NOT NULL,
+      run_id TEXT REFERENCES eval_runs(id),
+      case_id TEXT,
+      scope TEXT NOT NULL,
+      model TEXT NOT NULL,
+      prompt_tokens_est INTEGER,
+      response_tokens_est INTEGER,
+      total_tokens_est INTEGER,
+      latency_ms INTEGER,
+      status TEXT NOT NULL,
+      error TEXT,
+      timestamp INTEGER NOT NULL
+    )`,
+  ]) {
+    runIgnoringExpectedErrors(run, statement)
+  }
+}
+
+export function defaultCriterionRows(existingIds: Set<string>): DefaultCriterionRow[] {
+  return DEFAULT_CRITERIA.filter((criterion) => !existingIds.has(criterion.id)).map((criterion) => ({
+    id: criterion.id,
+    name: criterion.name,
+    description: criterion.description || '',
+    rubric: criterion.rubric,
+    scoreType: criterion.scoreType,
+    scaleConfig: JSON.stringify(criterion.scaleConfig || {}),
+    weight: criterion.weight,
+    isDefault: true,
+  }))
+}
+
+function runIgnoringExpectedErrors(run: RunStatement, statement: string) {
+  try {
+    run(statement)
+  } catch (error) {
+    if (isExpectedMigrationError(error)) return
+    throw error
+  }
+}
+
+function isExpectedMigrationError(error: unknown): boolean {
+  const message = String(error)
+  return (
+    message.includes('already exists') ||
+    message.includes('duplicate column name') ||
+    message.includes('no such column: "expected_answer"') ||
+    message.includes('no such column: expected_answer')
+  )
+}
diff --git a/src/db/index.ts b/src/db/index.ts
@@ -4,17 +4,12 @@
 
 import { Database } from 'bun:sqlite'
 import { drizzle } from 'drizzle-orm/bun-sqlite'
-import { existsSync, mkdirSync } from 'fs'
 import { join } from 'path'
+import { applySchemaMigrations, defaultCriterionRows, ensureDataDir } from './bootstrap'
 import * as schema from './schema'
 
-// Ensure data directory exists
-const dataDir = join(process.cwd(), 'data')
-if (!existsSync(dataDir)) {
-  mkdirSync(dataDir, { recursive: true })
-}
-
 // Initialize SQLite connection
+const dataDir = ensureDataDir(process.cwd())
 const sqlite = new Database(join(dataDir, 'seer.db'))
 export const db = drizzle(sqlite, { schema })
 
@@ -24,112 +19,21 @@ export const db = drizzle(sqlite, { schema })
 export async function initializeDB() {
   console.log('Initializing database...')
 
-  // Import seed function
-  const { seedDefaultCriteria } = await import('./seed')
-
-  // One-time migration: rename expected_answer → eval_guidance
-  try {
-    sqlite.run('ALTER TABLE eval_cases RENAME COLUMN expected_answer TO eval_guidance')
-    console.log('✓ Migrated: expected_answer → eval_guidance')
-  } catch {
-    // Column already renamed or doesn't exist — expected after first run
-  }
-
-  // One-time migration: add agent_schema to eval_sets
-  try {
-    sqlite.run('ALTER TABLE eval_sets ADD COLUMN agent_schema TEXT')
-    console.log('✓ Added: eval_sets.agent_schema')
-  } catch {
-    // Column already exists — expected after first run
-  }
-
-  // One-time migration: add agent_trace to eval_results
-  try {
-    sqlite.run('ALTER TABLE eval_results ADD COLUMN agent_trace TEXT')
-    console.log('✓ Added: eval_results.agent_trace')
-  } catch {
-    // Column already exists — expected after first run
-  }
-
-  // One-time migration: add agent_prompt to eval_sets
-  try {
-    sqlite.run('ALTER TABLE eval_sets ADD COLUMN agent_prompt TEXT')
-    console.log('✓ Added: eval_sets.agent_prompt')
-  } catch {
-    // Column already exists — expected after first run
-  }
-
-  // One-time migration: add simulator_prompt and simulator_agent_type to eval_sets
-  for (const col of ['simulator_prompt', 'simulator_agent_type']) {
-    try {
-      sqlite.run(`ALTER TABLE eval_sets ADD COLUMN ${col} TEXT`)
-      console.log(`✓ Added: eval_sets.${col}`)
-    } catch {
-      // Column already exists
-    }
-  }
-
-  // v0.2.0: add mode to eval_sets
-  try {
-    sqlite.run("ALTER TABLE eval_sets ADD COLUMN mode TEXT NOT NULL DEFAULT 'guidance'")
-    console.log('Added: eval_sets.mode')
-  } catch {
-    // Column already exists
-  }
-
-  // v0.2.0: add expected_output to eval_cases
-  try {
-    sqlite.run('ALTER TABLE eval_cases ADD COLUMN expected_output TEXT')
-    console.log('Added: eval_cases.expected_output')
-  } catch {
-    // Column already exists
-  }
-
-  // v0.2.0: create token_usage table
-  try {
-    sqlite.run(`CREATE TABLE IF NOT EXISTS token_usage (
-      id TEXT PRIMARY KEY NOT NULL,
-      run_id TEXT REFERENCES eval_runs(id),
-      case_id TEXT,
-      scope TEXT NOT NULL,
-      model TEXT NOT NULL,
-      prompt_tokens_est INTEGER,
-      response_tokens_est INTEGER,
-      total_tokens_est INTEGER,
-      latency_ms INTEGER,
-      status TEXT NOT NULL,
-      error TEXT,
-      timestamp INTEGER NOT NULL
-    )`)
-  } catch {
-    // Table already exists
-  }
+  applySchemaMigrations((statement) => sqlite.run(statement), process.cwd())
 
   // Check if default criteria already exist
   const existing = await db.select().from(schema.evalCriteria)
 
   if (existing.length === 0) {
     console.log('Seeding default criteria...')
-    await seedDefaultCriteria()
+    await db.insert(schema.evalCriteria).values(defaultCriterionRows(new Set()))
     console.log('✓ Default criteria seeded')
   } else {
     // Ensure new default criteria are added (e.g., instruction_following)
-    const { DEFAULT_CRITERIA } = await import('../criteria/defaults')
     const existingIds = new Set(existing.map((c) => c.id))
-    const missingCriteria = DEFAULT_CRITERIA.filter((c) => !existingIds.has(c.id))
+    const missingCriteria = defaultCriterionRows(existingIds)
     if (missingCriteria.length > 0) {
-      await db.insert(schema.evalCriteria).values(
-        missingCriteria.map((c) => ({
-          id: c.id,
-          name: c.name,
-          description: c.description || '',
-          rubric: c.rubric,
-          scoreType: c.scoreType,
-          scaleConfig: JSON.stringify(c.scaleConfig || {}),
-          weight: c.weight,
-          isDefault: true,
-        })),
-      )
+      await db.insert(schema.evalCriteria).values(missingCriteria)
       console.log(
         `✓ Added ${missingCriteria.length} new default criteria: ${missingCriteria.map((c) => c.id).join(', ')}`,
       )