Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions cli/selftune/evolution/evolve-body.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export interface EvolveBodyDeps {
appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
appendEvidenceEntry?: typeof import("./evidence.js").appendEvidenceEntry;
buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
readEffectiveSkillUsageRecords?: typeof import("../utils/skill-log.js").readEffectiveSkillUsageRecords;
readFileSync?: typeof readFileSync;
writeFileSync?: (path: string, data: string, encoding: string) => void;
}
Expand Down Expand Up @@ -140,6 +141,8 @@ export async function evolveBody(
const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry;
const _appendEvidenceEntry = _deps.appendEvidenceEntry ?? appendEvidenceEntry;
const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
const _readEffectiveSkillUsageRecords =
_deps.readEffectiveSkillUsageRecords ?? readEffectiveSkillUsageRecords;
const _readFileSync = _deps.readFileSync ?? readFileSync;
const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;

Expand Down Expand Up @@ -181,6 +184,8 @@ export async function evolveBody(

const currentContent = _readFileSync(skillPath, "utf-8");
const parsed = parseSkillSections(currentContent);
const createdAuditDetails = (): string => `original_description:${currentContent}`;
const skillUsage = _readEffectiveSkillUsageRecords();

// Step 2: Load eval set
let evalSet: EvalEntry[];
Expand All @@ -192,13 +197,11 @@ export async function evolveBody(
}
evalSet = parsed as EvalEntry[];
} else {
const skillRecords = readEffectiveSkillUsageRecords();
const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
evalSet = _buildEvalSet(skillRecords, queryRecords, skillName);
evalSet = _buildEvalSet(skillUsage, queryRecords, skillName);
}

// Step 3: Load skill usage and extract failure patterns
const skillUsage = readEffectiveSkillUsageRecords();
const failurePatterns = _extractFailurePatterns(
evalSet,
skillUsage,
Expand Down Expand Up @@ -265,11 +268,7 @@ export async function evolveBody(

lastProposal = proposal;

recordAudit(
proposal.proposal_id,
"created",
`${target} proposal created for ${skillName} (iteration ${iteration + 1})`,
);
recordAudit(proposal.proposal_id, "created", createdAuditDetails());
recordEvidence({
timestamp: new Date().toISOString(),
proposal_id: proposal.proposal_id,
Expand Down
7 changes: 6 additions & 1 deletion cli/selftune/hooks/prompt-log.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ export function processPrompt(
raw_source_ref: { event_type: "UserPromptSubmit" },
};
const isActionable = classifyIsActionable(query);
const promptIdentity = reservePromptIdentity(record.session_id, isActionable, promptStatePath);
const promptIdentity = reservePromptIdentity(
record.session_id,
isActionable,
promptStatePath,
canonicalLogPath,
);
const canonical = buildCanonicalPrompt({
...baseInput,
prompt_id: promptIdentity.prompt_id,
Expand Down
2 changes: 1 addition & 1 deletion cli/selftune/hooks/session-stop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export function processSessionStop(
event_type: "Stop",
},
};
const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath);
const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath, canonicalLogPath);

const canonicalSession = buildCanonicalSession({
...baseInput,
Expand Down
2 changes: 1 addition & 1 deletion cli/selftune/hooks/skill-eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ export function processToolUse(
event_type: "PostToolUse",
},
};
const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath);
const latestPrompt = getLatestPromptIdentity(sessionId, promptStatePath, canonicalLogPath);
const promptId =
latestPrompt.last_actionable_prompt_id ??
latestPrompt.last_prompt_id ??
Expand Down
64 changes: 43 additions & 21 deletions cli/selftune/ingestors/codex-rollout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import type {
SkillUsageRecord,
} from "../types.js";
import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";
import { isActionableQueryText } from "../utils/query-filter.js";

const MARKER_FILE = join(homedir(), ".claude", "codex_ingested_rollouts.json");

Expand Down Expand Up @@ -159,6 +160,10 @@ export interface ParsedRollout {
};
}

function optionalString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value : undefined;
}

/**
* Parse a Codex rollout JSONL file.
* Returns parsed data or null if the file is empty/unparseable.
Expand All @@ -180,6 +185,7 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR

const threadId = basename(path, ".jsonl").replace("rollout-", "");
let prompt = "";
let lastUserQuery = "";
const toolCalls: Record<string, number> = {};
const bashCommands: string[] = [];
const skillsTriggered: string[] = [];
Expand All @@ -201,6 +207,22 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
| undefined;
let observedSessionId: string | undefined;
let observedCwd: string | undefined;
let hasActionablePrompt = false;
const rememberPromptCandidate = (value: unknown): void => {
const message = typeof value === "string" ? value.trim() : "";
if (!message) return;
lastUserQuery = message;
if (isActionableQueryText(message)) {
if (!hasActionablePrompt) {
prompt = message;
hasActionablePrompt = true;
}
return;
}
if (!prompt) {
prompt = message;
}
};

for (const line of lines) {
let event: Record<string, unknown>;
Expand All @@ -215,36 +237,40 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
// --- Observed local rollout format (session_meta, event_msg, turn_context, response_item) ---
if (etype === "session_meta") {
const payload = (event.payload as Record<string, unknown>) ?? {};
observedSessionId = (payload.id as string) ?? undefined;
observedCwd = (payload.cwd as string) ?? undefined;
const modelProvider = (payload.model_provider as string) ?? undefined;
const model = (payload.model as string) ?? undefined;
const originator = (payload.originator as string) ?? undefined;
observedMeta = { model_provider: modelProvider, model, originator };
const observedId = optionalString(payload.id);
const observedWorkspace = optionalString(payload.cwd);
const modelProvider = optionalString(payload.model_provider);
const model = optionalString(payload.model);
const originator = optionalString(payload.originator);
if (observedId) observedSessionId = observedId;
if (observedWorkspace) observedCwd = observedWorkspace;
if (!observedMeta) observedMeta = {};
if (modelProvider) observedMeta.model_provider = modelProvider;
if (model) observedMeta.model = model;
if (originator) observedMeta.originator = originator;
} else if (etype === "turn_context") {
const payload = (event.payload as Record<string, unknown>) ?? {};
const approvalPolicy = (payload.approval_policy as string) ?? undefined;
const sandboxPolicy = (payload.sandbox_policy as string) ?? undefined;
const model = (payload.model as string) ?? undefined;
const approvalPolicy = optionalString(payload.approval_policy);
const sandboxPolicy = optionalString(payload.sandbox_policy);
const model = optionalString(payload.model);
const gitPayload = payload.git as Record<string, unknown> | undefined;
if (!observedMeta) observedMeta = {};
if (approvalPolicy) observedMeta.approval_policy = approvalPolicy;
if (sandboxPolicy) observedMeta.sandbox_policy = sandboxPolicy;
if (model) observedMeta.model = model;
if (gitPayload) {
observedMeta.git = {
branch: (gitPayload.branch as string) ?? undefined,
remote: (gitPayload.remote as string) ?? undefined,
commit: (gitPayload.commit as string) ?? (gitPayload.sha as string) ?? undefined,
branch: optionalString(gitPayload.branch),
remote: optionalString(gitPayload.remote),
commit: optionalString(gitPayload.commit) ?? optionalString(gitPayload.sha),
};
}
turns += 1;
} else if (etype === "event_msg") {
const payload = (event.payload as Record<string, unknown>) ?? {};
const msgType = (payload.type as string) ?? "";
if (msgType === "user_message") {
const message = (payload.message as string) ?? "";
if (message && !prompt) prompt = message;
rememberPromptCandidate(payload.message);
}
// Token usage in event_msg payloads
const tokenCount = payload.token_count as Record<string, number> | undefined;
Expand Down Expand Up @@ -281,9 +307,7 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
const usage = (event.usage as Record<string, number>) ?? {};
inputTokens += usage.input_tokens ?? 0;
outputTokens += usage.output_tokens ?? 0;
if (!prompt) {
prompt = (event.user_message as string) ?? "";
}
rememberPromptCandidate(event.user_message);
} else if (etype === "turn.failed") {
errors += 1;
} else if (etype === "item.completed" || etype === "item.started" || etype === "item.updated") {
Expand Down Expand Up @@ -325,9 +349,7 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
}

// Some rollout formats embed the original prompt
if (!prompt && (event.prompt as string)) {
prompt = event.prompt as string;
}
rememberPromptCandidate(event.prompt);
}

// Infer file date from path structure: .../YYYY/MM/DD/rollout-*.jsonl
Expand Down Expand Up @@ -366,7 +388,7 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
transcript_chars: lines.reduce((sum, l) => sum + l.length, 0),
cwd: observedCwd ?? "",
transcript_path: path,
last_user_query: prompt,
last_user_query: lastUserQuery || prompt,
observed_meta: observedMeta,
};
}
Expand Down
42 changes: 38 additions & 4 deletions cli/selftune/ingestors/opencode-ingest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ import {
derivePromptId,
deriveSkillInvocationId,
} from "../normalization.js";
import type { CanonicalRecord, QueryLogRecord, SkillUsageRecord } from "../types.js";
import type {
CanonicalRecord,
QueryLogRecord,
SessionTelemetryRecord,
SkillUsageRecord,
} from "../types.js";
import { appendJsonl, loadMarker, saveMarker } from "../utils/jsonl.js";

const XDG_DATA_HOME = process.env.XDG_DATA_HOME ?? join(homedir(), ".local", "share");
Expand All @@ -64,6 +69,21 @@ interface TriggeredSkillDetection {
has_skill_md_read: boolean;
}

function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

function containsWholeSkillMention(text: string, skillName: string): boolean {
const trimmedSkillName = skillName.trim();
if (!text || !trimmedSkillName) return false;

const pattern = new RegExp(
`(^|[^A-Za-z0-9_])${escapeRegExp(trimmedSkillName)}([^A-Za-z0-9_]|$)`,
"i",
);
return pattern.test(text);
}

/** Return skill names from OpenCode skill directories. */
export function findSkillNames(dirs: string[] = OPENCODE_SKILLS_DIRS): Set<string> {
const names = new Set<string>();
Expand Down Expand Up @@ -302,7 +322,7 @@ export function readSessionsFromSqlite(
// Check text content for skill name mentions
const textContent = (block.text as string) ?? "";
for (const skillName of skillNames) {
if (textContent.includes(skillName)) {
if (containsWholeSkillMention(textContent, skillName)) {
noteSkillDetection(skillName, false);
}
}
Expand Down Expand Up @@ -441,7 +461,7 @@ export function readSessionsFromJsonFiles(

const text = (block.text as string) ?? "";
for (const skillName of skillNames) {
if (text.includes(skillName)) {
if (containsWholeSkillMention(text, skillName)) {
noteSkillDetection(skillName, false);
}
}
Expand Down Expand Up @@ -510,7 +530,21 @@ export function writeSession(
appendJsonl(queryLogPath, queryRecord, "all_queries");
}

const { query: _q, ...telemetry } = session;
const telemetry: SessionTelemetryRecord = {
timestamp: session.timestamp,
session_id: session.session_id,
cwd: session.cwd,
transcript_path: session.transcript_path,
tool_calls: session.tool_calls,
total_tool_calls: session.total_tool_calls,
bash_commands: session.bash_commands,
skills_triggered: session.skills_triggered,
assistant_turns: session.assistant_turns,
errors_encountered: session.errors_encountered,
transcript_chars: session.transcript_chars,
last_user_query: session.last_user_query,
source: session.source,
};
appendJsonl(telemetryLogPath, telemetry, "session_telemetry");

for (const skillName of skills) {
Expand Down
Loading