diff --git a/cli/selftune/evolution/evolve-body.ts b/cli/selftune/evolution/evolve-body.ts index 9dc92bc..2eb638a 100644 --- a/cli/selftune/evolution/evolve-body.ts +++ b/cli/selftune/evolution/evolve-body.ts @@ -84,6 +84,7 @@ export interface EvolveBodyDeps { appendAuditEntry?: typeof import("./audit.js").appendAuditEntry; appendEvidenceEntry?: typeof import("./evidence.js").appendEvidenceEntry; buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet; + readEffectiveSkillUsageRecords?: typeof import("../utils/skill-log.js").readEffectiveSkillUsageRecords; readFileSync?: typeof readFileSync; writeFileSync?: (path: string, data: string, encoding: string) => void; } @@ -140,6 +141,8 @@ export async function evolveBody( const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry; const _appendEvidenceEntry = _deps.appendEvidenceEntry ?? appendEvidenceEntry; const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet; + const _readEffectiveSkillUsageRecords = + _deps.readEffectiveSkillUsageRecords ?? readEffectiveSkillUsageRecords; const _readFileSync = _deps.readFileSync ?? readFileSync; const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync; @@ -181,6 +184,8 @@ export async function evolveBody( const currentContent = _readFileSync(skillPath, "utf-8"); const parsed = parseSkillSections(currentContent); + const createdAuditDetails = (): string => `original_description:${currentContent}`; + const skillUsage = _readEffectiveSkillUsageRecords(); // Step 2: Load eval set let evalSet: EvalEntry[]; @@ -192,13 +197,11 @@ export async function evolveBody( } evalSet = parsed as EvalEntry[]; } else { - const skillRecords = readEffectiveSkillUsageRecords(); const queryRecords = readJsonl(QUERY_LOG); - evalSet = _buildEvalSet(skillRecords, queryRecords, skillName); + evalSet = _buildEvalSet(skillUsage, queryRecords, skillName); } // Step 3: Load skill usage and extract failure patterns - const skillUsage = readEffectiveSkillUsageRecords(); const failurePatterns = _extractFailurePatterns( evalSet, skillUsage, @@ -265,11 +268,7 @@ export async function evolveBody( lastProposal = proposal; - recordAudit( - proposal.proposal_id, - "created", - `${target} proposal created for ${skillName} (iteration ${iteration + 1})`, - ); + recordAudit(proposal.proposal_id, "created", createdAuditDetails()); recordEvidence({ timestamp: new Date().toISOString(), proposal_id: proposal.proposal_id, diff --git a/cli/selftune/hooks/prompt-log.ts b/cli/selftune/hooks/prompt-log.ts index bf9a185..5dead20 100644 --- a/cli/selftune/hooks/prompt-log.ts +++ b/cli/selftune/hooks/prompt-log.ts @@ -56,7 +56,12 @@ export function processPrompt( raw_source_ref: { event_type: "UserPromptSubmit" }, }; const isActionable = classifyIsActionable(query); - const promptIdentity = reservePromptIdentity(record.session_id, isActionable, promptStatePath); + const promptIdentity = reservePromptIdentity( + record.session_id, + isActionable, + promptStatePath, + canonicalLogPath, + ); const canonical = buildCanonicalPrompt({ ...baseInput, prompt_id: promptIdentity.prompt_id, diff --git a/cli/selftune/hooks/session-stop.ts b/cli/selftune/hooks/session-stop.ts index b83c8fc..588948e 100644 --- a/cli/selftune/hooks/session-stop.ts +++ b/cli/selftune/hooks/session-stop.ts @@ -57,7 +57,7 @@ export function processSessionStop( event_type: "Stop", }, }; - const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath); + const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath, canonicalLogPath); const canonicalSession = buildCanonicalSession({ ...baseInput, diff --git a/cli/selftune/hooks/skill-eval.ts b/cli/selftune/hooks/skill-eval.ts index addb4ec..c22dffb 100644 --- a/cli/selftune/hooks/skill-eval.ts +++ b/cli/selftune/hooks/skill-eval.ts @@ -144,7 +144,7 @@ export function processToolUse( event_type: "PostToolUse", }, }; - const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath); + const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath, canonicalLogPath); const promptId = latestPrompt.last_actionable_prompt_id ?? latestPrompt.last_prompt_id ?? diff --git a/cli/selftune/ingestors/codex-rollout.ts b/cli/selftune/ingestors/codex-rollout.ts index 6d44745..9e00208 100644 --- a/cli/selftune/ingestors/codex-rollout.ts +++ b/cli/selftune/ingestors/codex-rollout.ts @@ -44,6 +44,7 @@ import type { SkillUsageRecord, } from "../types.js"; import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js"; +import { isActionableQueryText } from "../utils/query-filter.js"; const MARKER_FILE = join(homedir(), ".claude", "codex_ingested_rollouts.json"); @@ -159,6 +160,10 @@ export interface ParsedRollout { }; } +function optionalString(value: unknown): string | undefined { + return typeof value === "string" && value.trim() ? value : undefined; +} + /** * Parse a Codex rollout JSONL file. * Returns parsed data or null if the file is empty/unparseable. @@ -180,6 +185,7 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR const threadId = basename(path, ".jsonl").replace("rollout-", ""); let prompt = ""; + let lastUserQuery = ""; const toolCalls: Record = {}; const bashCommands: string[] = []; const skillsTriggered: string[] = []; @@ -201,6 +207,22 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR | undefined; let observedSessionId: string | undefined; let observedCwd: string | undefined; + let hasActionablePrompt = false; + const rememberPromptCandidate = (value: unknown): void => { + const message = typeof value === "string" ? value.trim() : ""; + if (!message) return; + lastUserQuery = message; + if (isActionableQueryText(message)) { + if (!hasActionablePrompt) { + prompt = message; + hasActionablePrompt = true; + } + return; + } + if (!prompt) { + prompt = message; + } + }; for (const line of lines) { let event: Record; @@ -215,17 +237,22 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR // --- Observed local rollout format (session_meta, event_msg, turn_context, response_item) --- if (etype === "session_meta") { const payload = (event.payload as Record) ?? {}; - observedSessionId = (payload.id as string) ?? undefined; - observedCwd = (payload.cwd as string) ?? undefined; - const modelProvider = (payload.model_provider as string) ?? undefined; - const model = (payload.model as string) ?? undefined; - const originator = (payload.originator as string) ?? undefined; - observedMeta = { model_provider: modelProvider, model, originator }; + const observedId = optionalString(payload.id); + const observedWorkspace = optionalString(payload.cwd); + const modelProvider = optionalString(payload.model_provider); + const model = optionalString(payload.model); + const originator = optionalString(payload.originator); + if (observedId) observedSessionId = observedId; + if (observedWorkspace) observedCwd = observedWorkspace; + if (!observedMeta) observedMeta = {}; + if (modelProvider) observedMeta.model_provider = modelProvider; + if (model) observedMeta.model = model; + if (originator) observedMeta.originator = originator; } else if (etype === "turn_context") { const payload = (event.payload as Record) ?? {}; - const approvalPolicy = (payload.approval_policy as string) ?? undefined; - const sandboxPolicy = (payload.sandbox_policy as string) ?? undefined; - const model = (payload.model as string) ?? undefined; + const approvalPolicy = optionalString(payload.approval_policy); + const sandboxPolicy = optionalString(payload.sandbox_policy); + const model = optionalString(payload.model); const gitPayload = payload.git as Record | undefined; if (!observedMeta) observedMeta = {}; if (approvalPolicy) observedMeta.approval_policy = approvalPolicy; @@ -233,9 +260,9 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR if (model) observedMeta.model = model; if (gitPayload) { observedMeta.git = { - branch: (gitPayload.branch as string) ?? undefined, - remote: (gitPayload.remote as string) ?? undefined, - commit: (gitPayload.commit as string) ?? (gitPayload.sha as string) ?? undefined, + branch: optionalString(gitPayload.branch), + remote: optionalString(gitPayload.remote), + commit: optionalString(gitPayload.commit) ?? optionalString(gitPayload.sha), }; } turns += 1; @@ -243,8 +270,7 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR const payload = (event.payload as Record) ?? {}; const msgType = (payload.type as string) ?? ""; if (msgType === "user_message") { - const message = (payload.message as string) ?? ""; - if (message && !prompt) prompt = message; + rememberPromptCandidate(payload.message); } // Token usage in event_msg payloads const tokenCount = payload.token_count as Record | undefined; @@ -281,9 +307,7 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR const usage = (event.usage as Record) ?? {}; inputTokens += usage.input_tokens ?? 0; outputTokens += usage.output_tokens ?? 0; - if (!prompt) { - prompt = (event.user_message as string) ?? ""; - } + rememberPromptCandidate(event.user_message); } else if (etype === "turn.failed") { errors += 1; } else if (etype === "item.completed" || etype === "item.started" || etype === "item.updated") { @@ -325,9 +349,7 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR } // Some rollout formats embed the original prompt - if (!prompt && (event.prompt as string)) { - prompt = event.prompt as string; - } + rememberPromptCandidate(event.prompt); } // Infer file date from path structure: .../YYYY/MM/DD/rollout-*.jsonl @@ -366,7 +388,7 @@ export function parseRolloutFile(path: string, skillNames: Set): ParsedR transcript_chars: lines.reduce((sum, l) => sum + l.length, 0), cwd: observedCwd ?? "", transcript_path: path, - last_user_query: prompt, + last_user_query: lastUserQuery || prompt, observed_meta: observedMeta, }; } diff --git a/cli/selftune/ingestors/opencode-ingest.ts b/cli/selftune/ingestors/opencode-ingest.ts index 9b8d4da..32f4aac 100644 --- a/cli/selftune/ingestors/opencode-ingest.ts +++ b/cli/selftune/ingestors/opencode-ingest.ts @@ -37,7 +37,12 @@ import { derivePromptId, deriveSkillInvocationId, } from "../normalization.js"; -import type { CanonicalRecord, QueryLogRecord, SkillUsageRecord } from "../types.js"; +import type { + CanonicalRecord, + QueryLogRecord, + SessionTelemetryRecord, + SkillUsageRecord, +} from "../types.js"; import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js"; const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share"); @@ -64,6 +69,21 @@ interface TriggeredSkillDetection { has_skill_md_read: boolean; } +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function containsWholeSkillMention(text: string, skillName: string): boolean { + const trimmedSkillName = skillName.trim(); + if (!text || !trimmedSkillName) return false; + + const pattern = new RegExp( + `(^|[^A-Za-z0-9_])${escapeRegExp(trimmedSkillName)}([^A-Za-z0-9_]|$)`, + "i", + ); + return pattern.test(text); +} + /** Return skill names from OpenCode skill directories. */ export function findSkillNames(dirs: string[] = OPENCODE_SKILLS_DIRS): Set { const names = new Set(); @@ -302,7 +322,7 @@ export function readSessionsFromSqlite( // Check text content for skill name mentions const textContent = (block.text as string) ?? ""; for (const skillName of skillNames) { - if (textContent.includes(skillName)) { + if (containsWholeSkillMention(textContent, skillName)) { noteSkillDetection(skillName, false); } } @@ -441,7 +461,7 @@ export function readSessionsFromJsonFiles( const text = (block.text as string) ?? ""; for (const skillName of skillNames) { - if (text.includes(skillName)) { + if (containsWholeSkillMention(text, skillName)) { noteSkillDetection(skillName, false); } } @@ -510,7 +530,21 @@ export function writeSession( appendJsonl(queryLogPath, queryRecord, "all_queries"); } - const { query: _q, ...telemetry } = session; + const telemetry: SessionTelemetryRecord = { + timestamp: session.timestamp, + session_id: session.session_id, + cwd: session.cwd, + transcript_path: session.transcript_path, + tool_calls: session.tool_calls, + total_tool_calls: session.total_tool_calls, + bash_commands: session.bash_commands, + skills_triggered: session.skills_triggered, + assistant_turns: session.assistant_turns, + errors_encountered: session.errors_encountered, + transcript_chars: session.transcript_chars, + last_user_query: session.last_user_query, + source: session.source, + }; appendJsonl(telemetryLogPath, telemetry, "session_telemetry"); for (const skillName of skills) { diff --git a/cli/selftune/normalization.ts b/cli/selftune/normalization.ts index d4c363f..48fef65 100644 --- a/cli/selftune/normalization.ts +++ b/cli/selftune/normalization.ts @@ -54,7 +54,15 @@ interface CanonicalPromptSessionState { updated_at: string; } -const PROMPT_STATE_LOCK_TIMEOUT_MS = 5_000; +interface PromptStateLockMetadata { + owner_id: string; + pid: number; + acquired_at: string; + heartbeat_at: string; + state_path: string; +} + +const PROMPT_STATE_LOCK_TIMEOUT_MS = 30_000; const PROMPT_STATE_LOCK_POLL_MS = 25; const PROMPT_STATE_LOCK_SAB = new SharedArrayBuffer(4); const PROMPT_STATE_LOCK_VIEW = new Int32Array(PROMPT_STATE_LOCK_SAB); @@ -139,18 +147,80 @@ function archiveCorruptPromptSessionState(path: string): void { renameSync(path, archivedPath); } +function joinPromptStateLockPath(path: string): string { + return `${path}.lock`; +} + +function joinPromptStateLockMetadataPath(lockPath: string): string { + return `${lockPath}/owner.json`; +} + +function writePromptStateLockMetadata(lockPath: string, ownerId: string, statePath: string): void { + const now = new Date().toISOString(); + const metadataPath = joinPromptStateLockMetadataPath(lockPath); + const metadata: PromptStateLockMetadata = { + owner_id: ownerId, + pid: process.pid, + acquired_at: now, + heartbeat_at: now, + state_path: statePath, + }; + writeFileSync(metadataPath, JSON.stringify(metadata, null, 2), "utf-8"); +} + +function readPromptStateLockMetadata(lockPath: string): PromptStateLockMetadata | null { + const metadataPath = joinPromptStateLockMetadataPath(lockPath); + if (!existsSync(metadataPath)) return null; + + try { + const parsed = JSON.parse(readFileSync(metadataPath, "utf-8")) as PromptStateLockMetadata; + if ( + typeof parsed.owner_id === "string" && + typeof parsed.pid === "number" && + typeof parsed.heartbeat_at === "string" + ) { + return parsed; + } + } catch { + return null; + } + + return null; +} + +function touchPromptStateLock(lockPath: string, ownerId: string, statePath: string): void { + const metadataPath = joinPromptStateLockMetadataPath(lockPath); + const current = readPromptStateLockMetadata(lockPath); + if (current && current.owner_id !== ownerId) return; + + const now = new Date().toISOString(); + const metadata: PromptStateLockMetadata = { + owner_id: ownerId, + pid: process.pid, + acquired_at: current?.acquired_at ?? now, + heartbeat_at: now, + state_path: statePath, + }; + writeFileSync(metadataPath, JSON.stringify(metadata, null, 2), "utf-8"); +} + function loadPromptSessionState( path: string, sessionId: string, + canonicalLogPath: string = CANONICAL_LOG, options?: { archiveCorrupt?: boolean }, ): CanonicalPromptSessionState { if (!existsSync(path)) { - return defaultPromptSessionState(sessionId); + return derivePromptSessionStateFromCanonicalLog(sessionId, canonicalLogPath); } try { const parsed = JSON.parse(readFileSync(path, "utf-8")) as CanonicalPromptSessionState; - if (parsed.session_id === sessionId && typeof parsed.next_prompt_index === "number") { + if ( + parsed.session_id === sessionId && + typeof parsed.next_prompt_index === "number" && + Number.isFinite(parsed.next_prompt_index) + ) { return parsed; } } catch { @@ -165,7 +235,7 @@ function loadPromptSessionState( } } - return derivePromptSessionStateFromCanonicalLog(sessionId); + return derivePromptSessionStateFromCanonicalLog(sessionId, canonicalLogPath); } function savePromptSessionState(path: string, state: CanonicalPromptSessionState): void { @@ -183,8 +253,11 @@ function joinTempStatePath(path: string): string { } function isStaleLock(lockPath: string): boolean { + const metadata = readPromptStateLockMetadata(lockPath); try { - return Date.now() - statSync(lockPath).mtimeMs > PROMPT_STATE_LOCK_TIMEOUT_MS; + const heartbeatAt = metadata ? Date.parse(metadata.heartbeat_at) : statSync(lockPath).mtimeMs; + if (!Number.isFinite(heartbeatAt)) return false; + return Date.now() - heartbeatAt > PROMPT_STATE_LOCK_TIMEOUT_MS; } catch { return false; } @@ -196,12 +269,14 @@ function withPromptStateLock(statePath: string, fn: () => T): T { mkdirSync(dir, { recursive: true }); } - const lockPath = `${statePath}.lock`; + const lockPath = joinPromptStateLockPath(statePath); const deadline = Date.now() + PROMPT_STATE_LOCK_TIMEOUT_MS; + const ownerId = `${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`; while (true) { try { mkdirSync(lockPath); + writePromptStateLockMetadata(lockPath, ownerId, statePath); break; } catch (error) { const code = (error as NodeJS.ErrnoException).code; @@ -221,9 +296,13 @@ function withPromptStateLock(statePath: string, fn: () => T): T { } try { + touchPromptStateLock(lockPath, ownerId, statePath); return fn(); } finally { - rmSync(lockPath, { recursive: true, force: true }); + const metadata = readPromptStateLockMetadata(lockPath); + if (!metadata || metadata.owner_id === ownerId) { + rmSync(lockPath, { recursive: true, force: true }); + } } } @@ -236,9 +315,12 @@ export function reservePromptIdentity( sessionId: string, isActionable: boolean, statePath: string = canonicalSessionStatePath(sessionId), + canonicalLogPath: string = CANONICAL_LOG, ): CanonicalPromptIdentity { return withPromptStateLock(statePath, () => { - const state = loadPromptSessionState(statePath, sessionId, { archiveCorrupt: true }); + const state = loadPromptSessionState(statePath, sessionId, canonicalLogPath, { + archiveCorrupt: true, + }); const promptIndex = state.next_prompt_index; const promptId = derivePromptId(sessionId, promptIndex); @@ -255,8 +337,9 @@ export function reservePromptIdentity( export function getLatestPromptIdentity( sessionId: string, statePath: string = canonicalSessionStatePath(sessionId), + canonicalLogPath: string = CANONICAL_LOG, ): { last_prompt_id?: string; last_actionable_prompt_id?: string } { - const state = loadPromptSessionState(statePath, sessionId); + const state = loadPromptSessionState(statePath, sessionId, canonicalLogPath); return { last_prompt_id: state.last_prompt_id, last_actionable_prompt_id: state.last_actionable_prompt_id, diff --git a/cli/selftune/repair/skill-usage.ts b/cli/selftune/repair/skill-usage.ts index 013cac0..8684576 100644 --- a/cli/selftune/repair/skill-usage.ts +++ b/cli/selftune/repair/skill-usage.ts @@ -193,21 +193,22 @@ export function rebuildSkillUsageFromTranscripts( } export function cliMain(): void { - const { values } = parseArgs({ - options: { - "projects-dir": { type: "string", default: CLAUDE_CODE_PROJECTS_DIR }, - since: { type: "string" }, - out: { type: "string", default: REPAIRED_SKILL_LOG }, - "sessions-marker": { type: "string", default: REPAIRED_SKILL_SESSIONS_MARKER }, - "skill-log": { type: "string", default: SKILL_LOG }, - "dry-run": { type: "boolean", default: false }, - help: { type: "boolean", default: false }, - }, - strict: true, - }); - - if (values.help) { - console.log(`selftune repair-skill-usage — Rebuild trustworthy skill usage from transcripts + try { + const { values } = parseArgs({ + options: { + "projects-dir": { type: "string", default: CLAUDE_CODE_PROJECTS_DIR }, + since: { type: "string" }, + out: { type: "string", default: REPAIRED_SKILL_LOG }, + "sessions-marker": { type: "string", default: REPAIRED_SKILL_SESSIONS_MARKER }, + "skill-log": { type: "string", default: SKILL_LOG }, + "dry-run": { type: "boolean", default: false }, + help: { type: "boolean", default: false }, + }, + strict: true, + }); + + if (values.help) { + console.log(`selftune repair-skill-usage — Rebuild trustworthy skill usage from transcripts Usage: selftune repair-skill-usage [options] @@ -220,55 +221,53 @@ Options: --skill-log Raw skill usage log path --dry-run Show counts without writing files --help Show this help`); - process.exit(0); - } + process.exit(0); + } - let since: Date | undefined; - if (values.since) { - since = new Date(values.since); - if (Number.isNaN(since.getTime())) { - console.error(`[ERROR] Invalid --since date: ${values.since}`); - process.exit(1); + let since: Date | undefined; + if (values.since) { + since = new Date(values.since); + if (Number.isNaN(since.getTime())) { + throw new Error(`Invalid --since date: ${values.since}`); + } } - } - const transcriptPaths = findTranscriptFiles( - values["projects-dir"] ?? CLAUDE_CODE_PROJECTS_DIR, - since, - ); - const rawSkillRecords = readJsonl(values["skill-log"] ?? SKILL_LOG); - const queryRecords = readJsonl(QUERY_LOG); - const { repairedRecords, repairedSessionIds } = rebuildSkillUsageFromTranscripts( - transcriptPaths, - rawSkillRecords, - ); - - const matchedQueries = new Set( - repairedRecords.map((record) => record.query.toLowerCase().trim()), - ); - const totalReinsQueries = queryRecords.filter( - (record) => typeof record.query === "string" && /\breins\b/i.test(record.query), - ).length; - const totalReinsMatches = repairedRecords.filter((record) => - /\breins\b/i.test(record.query), - ).length; - - const summary = { - transcripts_scanned: transcriptPaths.length, - repaired_sessions: repairedSessionIds.size, - repaired_records: repairedRecords.length, - unique_matched_queries: matchedQueries.size, - reins_queries_seen: totalReinsQueries, - reins_skill_matches: totalReinsMatches, - output: values.out ?? REPAIRED_SKILL_LOG, - }; - - if (values["dry-run"]) { - console.log(JSON.stringify(summary, null, 2)); - return; - } + const transcriptPaths = findTranscriptFiles( + values["projects-dir"] ?? CLAUDE_CODE_PROJECTS_DIR, + since, + ); + const rawSkillRecords = readJsonl(values["skill-log"] ?? SKILL_LOG); + const queryRecords = readJsonl(QUERY_LOG); + const { repairedRecords, repairedSessionIds } = rebuildSkillUsageFromTranscripts( + transcriptPaths, + rawSkillRecords, + ); + + const matchedQueries = new Set( + repairedRecords.map((record) => record.query.toLowerCase().trim()), + ); + const totalReinsQueries = queryRecords.filter( + (record) => typeof record.query === "string" && /\breins\b/i.test(record.query), + ).length; + const totalReinsMatches = repairedRecords.filter((record) => + /\breins\b/i.test(record.query), + ).length; + + const summary = { + transcripts_scanned: transcriptPaths.length, + repaired_sessions: repairedSessionIds.size, + repaired_records: repairedRecords.length, + unique_matched_queries: matchedQueries.size, + reins_queries_seen: totalReinsQueries, + reins_skill_matches: totalReinsMatches, + output: values.out ?? REPAIRED_SKILL_LOG, + }; + + if (values["dry-run"]) { + console.log(JSON.stringify(summary, null, 2)); + return; + } - try { writeRepairedSkillUsageRecords( repairedRecords, repairedSessionIds, @@ -278,9 +277,7 @@ Options: console.log(JSON.stringify(summary, null, 2)); } catch (error) { const message = error instanceof Error ? error.message : String(error); - console.error( - `[ERROR] Failed to write repaired skill usage to ${values.out ?? REPAIRED_SKILL_LOG}: ${message}`, - ); + console.error(`[ERROR] Failed to repair skill usage: ${message}`); process.exit(1); } } diff --git a/tests/evolution/evolve-body.test.ts b/tests/evolution/evolve-body.test.ts index de48bfd..8a44ea5 100644 --- a/tests/evolution/evolve-body.test.ts +++ b/tests/evolution/evolve-body.test.ts @@ -139,6 +139,8 @@ const mockBuildEvalSet = mock( }, ); +const mockReadEffectiveSkillUsageRecords = mock((): SkillUsageRecord[] => []); + const mockWriteFileSync = mock((_path: string, _data: string, _encoding: string) => {}); // --------------------------------------------------------------------------- @@ -156,6 +158,7 @@ function makeDeps(): EvolveBodyDeps { appendAuditEntry: mockAppendAuditEntry, appendEvidenceEntry: mockAppendEvidenceEntry, buildEvalSet: mockBuildEvalSet, + readEffectiveSkillUsageRecords: mockReadEffectiveSkillUsageRecords, writeFileSync: mockWriteFileSync, }; } @@ -221,6 +224,9 @@ afterEach(() => { { query: "unrelated", should_trigger: false }, ]); + mockReadEffectiveSkillUsageRecords.mockReset(); + mockReadEffectiveSkillUsageRecords.mockImplementation(() => []); + mockWriteFileSync.mockReset(); mockWriteFileSync.mockImplementation(() => {}); @@ -357,7 +363,10 @@ describe("evolveBody orchestrator", () => { }); test("audit entries collected throughout flow", async () => { - const opts = makeOptions(); + const originalContent = + "---\nname: test\n---\n\n# Test Skill\nA skill for testing\n\n## Workflow Routing\n\n| Trigger | Workflow |\n| --- | --- |\n| test | run |"; + const { skillPath } = createTempSkill(originalContent); + const opts = makeOptions({ skillPath }); const result = await evolveBody(opts, makeDeps()); expect(result.auditEntries.length).toBeGreaterThanOrEqual(3); @@ -365,6 +374,28 @@ describe("evolveBody orchestrator", () => { expect(actions).toContain("created"); expect(actions).toContain("validated"); expect(actions).toContain("deployed"); + const createdAudit = result.auditEntries.find((entry) => entry.action === "created"); + expect(createdAudit?.details).toBe(`original_description:${originalContent}`); + }); + + test("uses injected skill usage reader", async () => { + const skillUsage: SkillUsageRecord[] = [ + { + timestamp: "2026-03-10T00:00:00.000Z", + session_id: "sess-1", + skill_name: "test-skill", + skill_path: "/tmp/test-skill/SKILL.md", + query: "build the project", + triggered: true, + }, + ]; + mockReadEffectiveSkillUsageRecords.mockImplementation(() => skillUsage); + + await evolveBody(makeOptions(), makeDeps()); + + expect(mockReadEffectiveSkillUsageRecords.mock.calls.length).toBe(1); + expect(mockBuildEvalSet.mock.calls[0]?.[0]).toEqual(skillUsage); + expect(mockExtractFailurePatterns.mock.calls[0]?.[1]).toEqual(skillUsage); }); test("routing target uses routing proposal and validation", async () => { diff --git a/tests/ingestors/codex-rollout.test.ts b/tests/ingestors/codex-rollout.test.ts index dd5a5f3..1de8762 100644 --- a/tests/ingestors/codex-rollout.test.ts +++ b/tests/ingestors/codex-rollout.test.ts @@ -152,6 +152,28 @@ describe("parseRolloutFile", () => { expect(result?.last_user_query).toBe("build the project"); }); + test("keeps the first actionable prompt in multi-turn rollouts", () => { + const codexHome = join(tmpDir, "codex"); + const content = [ + '{"type":"event_msg","payload":{"type":"user_message","message":"Continue from where you left off."}}', + '{"type":"event_msg","payload":{"type":"user_message","message":"build the project"}}', + '{"type":"event_msg","payload":{"type":"user_message","message":"also add deployment checks"}}', + ].join("\n"); + + const path = createRolloutFile( + codexHome, + "2026", + "01", + "01", + "rollout-first-actionable.jsonl", + content, + ); + const result = parseRolloutFile(path, new Set()); + + expect(result?.query).toBe("build the project"); + expect(result?.last_user_query).toBe("also add deployment checks"); + }); + test("detects skill names in completed items", () => { const codexHome = join(tmpDir, "codex"); const content = [ @@ -198,6 +220,8 @@ describe("parseRolloutFile", () => { const content = [ '{"type":"session_meta","payload":{"id":"obs-session-1","cwd":"/project","model_provider":"openai","model":"gpt-4o","originator":"codex-cli"}}', '{"type":"turn_context","payload":{"approval_policy":"auto","sandbox_policy":"container","model":"gpt-4o","git":{"branch":"main","remote":"origin","commit":"abc123"}}}', + '{"type":"event_msg","payload":{"type":"user_message","message":"Continue from where you left off."}}', + '{"type":"session_meta","payload":{"id":"obs-session-1","originator":"codex-cli-secondary"}}', '{"type":"event_msg","payload":{"type":"user_message","message":"Build the project"}}', '{"type":"response_item","payload":{"type":"function_call","name":"write_file","arguments":"{}"}}', '{"type":"response_item","payload":{"type":"agent_reasoning","text":"Let me think about this"}}', @@ -226,11 +250,44 @@ describe("parseRolloutFile", () => { expect(result?.observed_meta).toBeTruthy(); expect(result?.observed_meta?.model_provider).toBe("openai"); expect(result?.observed_meta?.model).toBe("gpt-4o"); + expect(result?.observed_meta?.originator).toBe("codex-cli-secondary"); expect(result?.observed_meta?.approval_policy).toBe("auto"); expect(result?.observed_meta?.sandbox_policy).toBe("container"); expect(result?.observed_meta?.git?.branch).toBe("main"); expect(result?.observed_meta?.git?.commit).toBe("abc123"); }); + + test("ignores non-string observed metadata payload fields", () => { + const codexHome = join(tmpDir, "codex"); + const content = [ + '{"type":"session_meta","payload":{"id":123,"cwd":{"path":"/project"},"model_provider":["openai"],"model":false,"originator":42}}', + '{"type":"turn_context","payload":{"approval_policy":7,"sandbox_policy":{"mode":"container"},"model":["gpt-4o"],"git":{"branch":99,"remote":true,"commit":["abc123"]}}}', + '{"type":"event_msg","payload":{"type":"user_message","message":"Build the project"}}', + ].join("\n"); + + const path = createRolloutFile( + codexHome, + "2026", + "03", + "10", + "rollout-observed-invalid-meta.jsonl", + content, + ); + const result = parseRolloutFile(path, new Set()); + + expect(result?.session_id).toBe("observed-invalid-meta"); + expect(result?.cwd).toBe(""); + expect(result?.query).toBe("Build the project"); + expect(result?.last_user_query).toBe("Build the project"); + expect(result?.observed_meta?.model_provider).toBeUndefined(); + expect(result?.observed_meta?.model).toBeUndefined(); + expect(result?.observed_meta?.originator).toBeUndefined(); + expect(result?.observed_meta?.approval_policy).toBeUndefined(); + expect(result?.observed_meta?.sandbox_policy).toBeUndefined(); + expect(result?.observed_meta?.git?.branch).toBeUndefined(); + expect(result?.observed_meta?.git?.remote).toBeUndefined(); + expect(result?.observed_meta?.git?.commit).toBeUndefined(); + }); }); describe("ingestFile", () => { diff --git a/tests/ingestors/opencode-ingest.test.ts b/tests/ingestors/opencode-ingest.test.ts index 88ce830..f323dc6 100644 --- a/tests/ingestors/opencode-ingest.test.ts +++ b/tests/ingestors/opencode-ingest.test.ts @@ -137,6 +137,39 @@ describe("readSessionsFromSqlite", () => { expect(s.skill_detections).toEqual([{ skill_name: "Deploy", has_skill_md_read: true }]); }); + test("uses whole-word matching for text-only skill mentions", () => { + const dbPath = join(tmpDir, "opencode.db"); + const db = createTestDb(dbPath); + + const created = Date.now(); + db.run("INSERT INTO session (id, title, created, updated) VALUES (?, ?, ?, ?)", [ + "sess-mention", + "Mention test", + created, + created, + ]); + + db.run("INSERT INTO message (id, session_id, role, content, created) VALUES (?, ?, ?, ?, ?)", [ + "msg-1", + "sess-mention", + "user", + JSON.stringify([{ type: "text", text: "Plan the deploy" }]), + created, + ]); + db.run("INSERT INTO message (id, session_id, role, content, created) VALUES (?, ?, ?, ?, ?)", [ + "msg-2", + "sess-mention", + "assistant", + JSON.stringify([{ type: "text", text: "DeploySkill can help here." }]), + created + 1, + ]); + db.close(); + + const sessions = readSessionsFromSqlite(dbPath, null, new Set(["Deploy"])); + expect(sessions[0].skills_triggered).toEqual([]); + expect(sessions[0].skill_detections).toEqual([]); + }); + test("handles OpenAI tool_calls format", () => { const dbPath = join(tmpDir, "opencode.db"); const db = createTestDb(dbPath); @@ -391,6 +424,8 @@ describe("writeSession", () => { const telemetryLines = readFileSync(telemetryLog, "utf-8").trim().split("\n"); const telemetryRecord = JSON.parse(telemetryLines[0]); expect(telemetryRecord.session_id).toBe("sess-oc-1"); + expect(telemetryRecord.skill_detections).toBeUndefined(); + expect(telemetryRecord.is_metadata_only).toBeUndefined(); const skillLines = readFileSync(skillLog, "utf-8").trim().split("\n"); const skillRecord = JSON.parse(skillLines[0]); diff --git a/tests/normalization/normalization.test.ts b/tests/normalization/normalization.test.ts index e81353a..83b3afa 100644 --- a/tests/normalization/normalization.test.ts +++ b/tests/normalization/normalization.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import { mkdtempSync, rmSync } from "node:fs"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { @@ -179,6 +179,60 @@ describe("ID derivation", () => { rmSync(dir, { recursive: true, force: true }); } }); + + test("recovers prompt state from a custom canonical log path", () => { + const dir = mkdtempSync(join(tmpdir(), "selftune-normalization-recovery-")); + const statePath = join(dir, "canonical-session-state.json"); + const canonicalLogPath = join(dir, "canonical.jsonl"); + + try { + writeFileSync( + canonicalLogPath, + `${[ + JSON.stringify( + buildCanonicalPrompt({ + platform: "claude_code", + capture_mode: "hook", + source_session_kind: "interactive", + session_id: "sess-456", + raw_source_ref: { event_type: "UserPromptSubmit" }, + prompt_id: "sess-456:p0", + occurred_at: "2026-03-10T00:00:00Z", + prompt_text: "Fix the auth bug", + prompt_index: 0, + is_actionable: true, + }), + ), + JSON.stringify( + buildCanonicalPrompt({ + platform: "claude_code", + capture_mode: "hook", + source_session_kind: "interactive", + session_id: "sess-456", + raw_source_ref: { event_type: "UserPromptSubmit" }, + prompt_id: "sess-456:p1", + occurred_at: "2026-03-10T00:00:01Z", + prompt_text: "done", + prompt_index: 1, + is_actionable: false, + }), + ), + ].join("\n")}\n`, + "utf-8", + ); + + expect(getLatestPromptIdentity("sess-456", statePath, canonicalLogPath)).toEqual({ + last_prompt_id: "sess-456:p1", + last_actionable_prompt_id: "sess-456:p0", + }); + expect(reservePromptIdentity("sess-456", true, statePath, canonicalLogPath)).toEqual({ + prompt_id: "sess-456:p2", + prompt_index: 2, + }); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); }); describe("buildCanonicalSession", () => {