diff --git a/test/behavior-checker.test.ts b/test/behavior-checker.test.ts new file mode 100644 index 0000000..a1f961e --- /dev/null +++ b/test/behavior-checker.test.ts @@ -0,0 +1,175 @@ +import { describe, test, expect } from 'bun:test'; +import { checkBehaviors, loadBehaviorSpec, listBehaviorSpecs } from './helpers/behavior-checker'; +import type { BehaviorSpec } from './helpers/behavior-checker'; + +describe('behavior-checker', () => { + test('listBehaviorSpecs returns all spec files', () => { + const specs = listBehaviorSpecs(); + expect(specs).toContain('review'); + expect(specs).toContain('retro'); + expect(specs).toContain('qa'); + expect(specs).toContain('ship'); + expect(specs).toContain('plan-ceo-review'); + expect(specs).toContain('plan-eng-review'); + expect(specs.length).toBe(6); + }); + + test('loadBehaviorSpec returns null for nonexistent skill', () => { + expect(loadBehaviorSpec('nonexistent-skill')).toBeNull(); + }); + + test('loadBehaviorSpec loads valid spec', () => { + const spec = loadBehaviorSpec('review'); + expect(spec).not.toBeNull(); + expect(spec!.skill).toBe('review'); + expect(spec!.assertions.length).toBeGreaterThan(0); + }); + + test('pattern_exists passes when pattern is found', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'pattern_exists', pattern: 'hello', description: 'find hello' }, + ], + }; + const result = checkBehaviors('hello world', spec); + expect(result.passed).toBe(true); + expect(result.results[0].passed).toBe(true); + }); + + test('pattern_exists fails when pattern is missing', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'pattern_exists', pattern: 'goodbye', description: 'find goodbye' }, + ], + }; + const result = checkBehaviors('hello world', spec); + expect(result.passed).toBe(false); + expect(result.results[0].detail).toContain('not found'); + }); + + test('pattern_exists with regex', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'pattern_exists', pattern: '\\d+ commits', regex: true, description: 'metric' }, + ], + }; + expect(checkBehaviors('Found 42 commits this week', spec).passed).toBe(true); + expect(checkBehaviors('No metrics here', spec).passed).toBe(false); + }); + + test('pattern_exists with case_insensitive', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'pattern_exists', pattern: 'CRITICAL', regex: true, case_insensitive: true, description: 'crit' }, + ], + }; + expect(checkBehaviors('critical finding: SQL injection', spec).passed).toBe(true); + expect(checkBehaviors('CRITICAL: race condition', spec).passed).toBe(true); + }); + + test('pattern_absent passes when pattern is not found', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'pattern_absent', pattern: 'variable name', regex: true, case_insensitive: true, description: 'no nitpick' }, + ], + }; + expect(checkBehaviors('SQL injection in user controller', spec).passed).toBe(true); + }); + + test('pattern_absent fails when pattern is found', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'pattern_absent', pattern: 'variable name', regex: true, case_insensitive: true, description: 'no nitpick' }, + ], + }; + const result = checkBehaviors('Consider renaming this variable name to something clearer', spec); + expect(result.passed).toBe(false); + expect(result.results[0].detail).toContain('Unwanted'); + }); + + test('min_sections counts headings correctly', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'min_sections', heading_level: 2, min_count: 3, description: 'enough sections' }, + ], + }; + const output = `# Title +## Section One +content +## Section Two +content +## Section Three +content`; + expect(checkBehaviors(output, spec).passed).toBe(true); + }); + + test('min_sections fails when not enough headings', () => { + const spec: BehaviorSpec = { + skill: 'test', + assertions: [ + { type: 'min_sections', heading_level: 2, min_count: 3, description: 'enough sections' }, + ], + }; + const result = checkBehaviors('## Only One\ncontent', spec); + expect(result.passed).toBe(false); + expect(result.results[0].detail).toContain('Found 1'); + }); + + test('all spec files parse correctly', () => { + const specs = listBehaviorSpecs(); + for (const name of specs) { + const spec = loadBehaviorSpec(name); + expect(spec).not.toBeNull(); + expect(spec!.skill).toBe(name); + expect(spec!.assertions.length).toBeGreaterThan(0); + for (const a of spec!.assertions) { + expect(a.type).toBeDefined(); + expect(a.description).toBeDefined(); + } + } + }); + + test('review spec passes on realistic output', () => { + const spec = loadBehaviorSpec('review')!; + const output = `# Pre-Landing Review + +## CRITICAL Findings + +1. SQL injection in user_controller.rb line 15 + +## INFORMATIONAL Findings + +1. Missing index on users.email column +2. Consider adding rate limiting to login endpoint`; + const result = checkBehaviors(output, spec); + expect(result.passed).toBe(true); + }); + + test('retro spec passes on realistic output', () => { + const spec = loadBehaviorSpec('retro')!; + const output = `# Weekly Retrospective + +## Summary + +15 commits by 3 contributors this week. + +## Team Contributions + +### Alice +- 8 commits, 2 PRs merged +- Strong work on the auth module + +### Bob +- 5 commits focused on testing +- 1 PR merged`; + const result = checkBehaviors(output, spec); + expect(result.passed).toBe(true); + }); +}); diff --git a/test/fixtures/behaviors/plan-ceo-review.json b/test/fixtures/behaviors/plan-ceo-review.json new file mode 100644 index 0000000..b6ac56b --- /dev/null +++ b/test/fixtures/behaviors/plan-ceo-review.json @@ -0,0 +1,30 @@ +{ + "skill": "plan-ceo-review", + "assertions": [ + { + "type": "section_exists", + "pattern": "NOT in scope|not in scope|Out of scope|out of scope", + "regex": true, + "description": "Output must define scope boundaries" + }, + { + "type": "pattern_exists", + "pattern": "SCOPE EXPANSION|HOLD SCOPE|SCOPE REDUCTION", + "regex": true, + "description": "Output must reference the selected scope mode" + }, + { + "type": "min_sections", + "heading_level": 2, + "min_count": 3, + "description": "Output must have at least 3 major sections" + }, + { + "type": "pattern_exists", + "pattern": "edge case|failure mode|error|rollback", + "regex": true, + "case_insensitive": true, + "description": "Output must address failure modes or edge cases" + } + ] +} diff --git a/test/fixtures/behaviors/plan-eng-review.json b/test/fixtures/behaviors/plan-eng-review.json new file mode 100644 index 0000000..aa41a56 --- /dev/null +++ b/test/fixtures/behaviors/plan-eng-review.json @@ -0,0 +1,31 @@ +{ + "skill": "plan-eng-review", + "assertions": [ + { + "type": "section_exists", + "pattern": "Step 0|Scope Challenge|scope", + "regex": true, + "case_insensitive": true, + "description": "Output must include Step 0 scope challenge" + }, + { + "type": "pattern_exists", + "pattern": "test|Test|testing|coverage", + "regex": true, + "description": "Output must address test coverage" + }, + { + "type": "pattern_exists", + "pattern": "diagram|ascii|flow|\\+--", + "regex": true, + "case_insensitive": true, + "description": "Output should include diagrams or diagram references" + }, + { + "type": "min_sections", + "heading_level": 2, + "min_count": 2, + "description": "Output must have at least 2 major sections" + } + ] +} diff --git a/test/fixtures/behaviors/qa.json b/test/fixtures/behaviors/qa.json new file mode 100644 index 0000000..c43d69d --- /dev/null +++ b/test/fixtures/behaviors/qa.json @@ -0,0 +1,26 @@ +{ + "skill": "qa", + "assertions": [ + { + "type": "pattern_exists", + "pattern": "bug|issue|finding|error|defect", + "regex": true, + "case_insensitive": true, + "description": "Output must report bugs, issues, or findings" + }, + { + "type": "pattern_exists", + "pattern": "severity|critical|high|medium|low", + "regex": true, + "case_insensitive": true, + "description": "Output must classify findings by severity" + }, + { + "type": "pattern_exists", + "pattern": "screenshot|evidence|console|observed", + "regex": true, + "case_insensitive": true, + "description": "Output must include evidence for findings" + } + ] +} diff --git a/test/fixtures/behaviors/retro.json b/test/fixtures/behaviors/retro.json new file mode 100644 index 0000000..a5bc621 --- /dev/null +++ b/test/fixtures/behaviors/retro.json @@ -0,0 +1,25 @@ +{ + "skill": "retro", + "assertions": [ + { + "type": "pattern_exists", + "pattern": "\\d+\\s*(commit|PR|merge|push)", + "regex": true, + "case_insensitive": true, + "description": "Output must contain quantitative commit or PR metrics" + }, + { + "type": "pattern_exists", + "pattern": "contributor|author|team|person", + "regex": true, + "case_insensitive": true, + "description": "Output must reference contributors or team members" + }, + { + "type": "min_sections", + "heading_level": 2, + "min_count": 2, + "description": "Output must have at least 2 major sections" + } + ] +} diff --git a/test/fixtures/behaviors/review.json b/test/fixtures/behaviors/review.json new file mode 100644 index 0000000..834452b --- /dev/null +++ b/test/fixtures/behaviors/review.json @@ -0,0 +1,24 @@ +{ + "skill": "review", + "assertions": [ + { + "type": "pattern_exists", + "pattern": "CRITICAL|Critical|critical", + "regex": true, + "description": "Output must contain critical findings or explicitly state none found" + }, + { + "type": "pattern_exists", + "pattern": "INFORMATIONAL|Informational|informational|info", + "regex": true, + "description": "Output must contain informational findings section" + }, + { + "type": "pattern_absent", + "pattern": "variable name|naming convention|import order|whitespace style", + "regex": true, + "case_insensitive": true, + "description": "Output should not nitpick style issues" + } + ] +} diff --git a/test/fixtures/behaviors/ship.json b/test/fixtures/behaviors/ship.json new file mode 100644 index 0000000..8caca1f --- /dev/null +++ b/test/fixtures/behaviors/ship.json @@ -0,0 +1,19 @@ +{ + "skill": "ship", + "assertions": [ + { + "type": "pattern_exists", + "pattern": "PR|pull request|push|branch", + "regex": true, + "case_insensitive": true, + "description": "Output must reference PR creation or branch push" + }, + { + "type": "pattern_exists", + "pattern": "test|passing|passed|green|CI", + "regex": true, + "case_insensitive": true, + "description": "Output must reference test results" + } + ] +} diff --git a/test/helpers/behavior-checker.ts b/test/helpers/behavior-checker.ts new file mode 100644 index 0000000..311dfd7 --- /dev/null +++ b/test/helpers/behavior-checker.ts @@ -0,0 +1,161 @@ +/** + * Behavior assertion runner for skill E2E tests. + * + * Loads JSON behavior specs from test/fixtures/behaviors/ and checks + * that skill output contains (or does not contain) expected patterns. + * Deterministic, free (no API cost), and fast. + * + * Used by test/skill-e2e.test.ts after E2E tests capture output. + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +export interface Assertion { + type: 'section_exists' | 'pattern_exists' | 'pattern_absent' | 'min_sections'; + pattern?: string; + regex?: boolean; + case_insensitive?: boolean; + heading_level?: number; + min_count?: number; + description: string; +} + +export interface BehaviorSpec { + skill: string; + assertions: Assertion[]; +} + +export interface AssertionResult { + assertion: Assertion; + passed: boolean; + detail?: string; +} + +export interface BehaviorCheckResult { + passed: boolean; + results: AssertionResult[]; +} + +const BEHAVIORS_DIR = path.join(__dirname, '..', 'fixtures', 'behaviors'); + +/** + * Load a behavior spec for a skill by name. + * Returns null if no spec file exists. + */ +export function loadBehaviorSpec(skillName: string): BehaviorSpec | null { + const specPath = path.join(BEHAVIORS_DIR, `${skillName}.json`); + if (!fs.existsSync(specPath)) return null; + return JSON.parse(fs.readFileSync(specPath, 'utf-8')); +} + +/** + * List all available behavior specs. + */ +export function listBehaviorSpecs(): string[] { + if (!fs.existsSync(BEHAVIORS_DIR)) return []; + return fs.readdirSync(BEHAVIORS_DIR) + .filter(f => f.endsWith('.json')) + .map(f => f.replace('.json', '')); +} + +/** + * Run all assertions from a behavior spec against the given output text. + */ +export function checkBehaviors(output: string, spec: BehaviorSpec): BehaviorCheckResult { + const results: AssertionResult[] = []; + + for (const assertion of spec.assertions) { + const result = runAssertion(output, assertion); + results.push(result); + } + + return { + passed: results.every(r => r.passed), + results, + }; +} + +function runAssertion(output: string, assertion: Assertion): AssertionResult { + switch (assertion.type) { + case 'section_exists': + case 'pattern_exists': + return checkPatternExists(output, assertion); + case 'pattern_absent': + return checkPatternAbsent(output, assertion); + case 'min_sections': + return checkMinSections(output, assertion); + default: + return { assertion, passed: false, detail: `Unknown assertion type: ${assertion.type}` }; + } +} + +function checkPatternExists(output: string, assertion: Assertion): AssertionResult { + if (!assertion.pattern) { + return { assertion, passed: false, detail: 'No pattern specified' }; + } + + const flags = assertion.case_insensitive ? 'i' : ''; + + if (assertion.regex) { + const re = new RegExp(assertion.pattern, flags); + const match = re.test(output); + return { + assertion, + passed: match, + detail: match ? undefined : `Pattern /${assertion.pattern}/${flags} not found in output`, + }; + } + + const haystack = assertion.case_insensitive ? output.toLowerCase() : output; + const needle = assertion.case_insensitive ? assertion.pattern.toLowerCase() : assertion.pattern; + const found = haystack.includes(needle); + return { + assertion, + passed: found, + detail: found ? undefined : `"${assertion.pattern}" not found in output`, + }; +} + +function checkPatternAbsent(output: string, assertion: Assertion): AssertionResult { + if (!assertion.pattern) { + return { assertion, passed: false, detail: 'No pattern specified' }; + } + + const flags = assertion.case_insensitive ? 'i' : ''; + + if (assertion.regex) { + const re = new RegExp(assertion.pattern, flags); + const match = re.test(output); + return { + assertion, + passed: !match, + detail: match ? `Unwanted pattern /${assertion.pattern}/${flags} found in output` : undefined, + }; + } + + const haystack = assertion.case_insensitive ? output.toLowerCase() : output; + const needle = assertion.case_insensitive ? assertion.pattern.toLowerCase() : assertion.pattern; + const found = haystack.includes(needle); + return { + assertion, + passed: !found, + detail: found ? `Unwanted pattern "${assertion.pattern}" found in output` : undefined, + }; +} + +function checkMinSections(output: string, assertion: Assertion): AssertionResult { + const level = assertion.heading_level ?? 2; + const minCount = assertion.min_count ?? 1; + const prefix = '#'.repeat(level) + ' '; + + const headingCount = output.split('\n').filter(line => line.startsWith(prefix)).length; + + return { + assertion, + passed: headingCount >= minCount, + detail: headingCount >= minCount + ? undefined + : `Found ${headingCount} h${level} headings, expected at least ${minCount}`, + }; +}