Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions lib/health-score.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,19 @@
//
// ── How the score is computed (READ THIS BEFORE EDITING) ────────────────
//
// Two independent classifiers run in sequence, and the more severe of
// the two wins:
// A fresh-session guard runs first, then two independent classifiers run
// in sequence and the more severe wins:
//
// 0. FRESH-SESSION GUARD. Any conversation with turnCount at or below
// FRESH_SESSION_TURN_CEIL AND contextPct strictly below
// FRESH_SESSION_CONTEXT_CEIL is Healthy unconditionally. This blocks
// stale state (growthRate from a prior conversation, leaked
// isDetailHeavy, future projection wrappers) from escalating a
// session that has no real history yet. The contract is "fresh =
// Healthy", measured against truth (real turn count, real context
// fill), not against derived signals. Wrappers like
// escalateForProjection still run on the returned HealthScore and
// can escalate it later if a real draft is active.
//
// 1. PRIMARY (per-model utilization). The conversation's context % is
// compared to model-specific warn / critical thresholds from
Expand Down Expand Up @@ -107,6 +118,27 @@ export const FAST_GROWTH_PCT = 8;
*/
export const TURN_AWARE_WARN_OFFSET = 10;

/**
* Fresh-session guard ceilings. A conversation at or below this turn
* count AND strictly below this context % is Healthy regardless of any
* derived signal (growthRate, isDetailHeavy, future projection wrappers).
*
* Rationale: the U-shaped attention research that drives every other rule
* here requires meaningful turn count and meaningful context fill to make
* a confident prediction. At zero or near-zero of both, every secondary
* signal is noise. We refuse to coach on noise. The ceilings come from
* GET-36 acceptance criteria: turnCount <= 2 AND contextPct < 30.
*
* The contextPct ceiling is exclusive on purpose. Any conversation that
* has reached 30% of the model's window has enough context fill that the
* per-model warn / critical thresholds (which start at 50% on the most
* conservative profile) deserve their full classifier pass. The turnCount
* ceiling is inclusive: a turn-2 reply on a brand-new chat is still
* indistinguishable from a turn-1 or turn-0 state for our purposes.
*/
export const FRESH_SESSION_TURN_CEIL = 2;
export const FRESH_SESSION_CONTEXT_CEIL = 30;

// ── Score computation ─────────────────────────────────────────────────────────

export interface HealthInput {
Expand Down Expand Up @@ -144,6 +176,21 @@ export interface HealthInput {
export function computeHealthScore(input: HealthInput): HealthScore {
const { contextPct, turnCount, growthRate, model, isDetailHeavy } = input;

// Rule 0 (fresh-session guard): a session with too few turns AND too
// little context fill cannot be in any rot zone we can confidently
// claim. Return Healthy before the secondary signals (growthRate,
// turn-count rules) get a chance to fire on noise. The model-aware
// coaching string still names the model so the copy stays consistent
// with what the user sees once the conversation matures.
if (turnCount <= FRESH_SESSION_TURN_CEIL && contextPct < FRESH_SESSION_CONTEXT_CEIL) {
return {
level: 'healthy',
label: 'Healthy',
coaching: getRotCoaching(model, contextPct, isDetailHeavy),
contextPct,
};
}

const profile = getRotProfile(model);
const thresholds = getEffectiveThresholds(model, isDetailHeavy);
const zone = getRotZone(model, contextPct, isDetailHeavy);
Expand Down
160 changes: 160 additions & 0 deletions tests/unit/health-score.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import {
TURN_DEGRADING_CEIL,
TURN_CRITICAL_CEIL,
FAST_GROWTH_PCT,
FRESH_SESSION_TURN_CEIL,
FRESH_SESSION_CONTEXT_CEIL,
type HealthInput,
} from '../../lib/health-score';

Expand Down Expand Up @@ -164,6 +166,164 @@ describe('healthy', () => {
});
});

// ── Fresh-session guard (GET-36) ─────────────────────────────────────────────
//
// A conversation with turnCount <= 2 AND contextPct < 30 must return
// Healthy regardless of any derived signal that may have leaked from a
// prior conversation, tab, or session. The guard runs before the
// per-model classifier so growthRate, isDetailHeavy, and any future
// projection wrapper cannot escalate fresh sessions.

describe('fresh-session guard (GET-36)', () => {
it('returns Healthy on completely empty input', () => {
const h = computeHealthScore({
contextPct: 0,
turnCount: 0,
growthRate: null,
model: '',
isDetailHeavy: false,
});
expect(h.level).toBe('healthy');
expect(h.label).toBe('Healthy');
});

it('returns Healthy at the upper boundary (turnCount=2, contextPct=29.9)', () => {
const h = computeHealthScore(input({
turnCount: FRESH_SESSION_TURN_CEIL,
contextPct: FRESH_SESSION_CONTEXT_CEIL - 0.1,
}));
expect(h.level).toBe('healthy');
});

it('ignores stale large growthRate when turns and context are fresh', () => {
// Simulates state leak: prior conversation populated growthRate to
// a huge value before SPA navigation. The guard must still return
// Healthy because turnCount and contextPct are below the ceilings.
const h = computeHealthScore(input({
turnCount: 1,
contextPct: 20,
growthRate: 999,
}));
expect(h.level).toBe('healthy');
});

it('ignores stale isDetailHeavy=true when turns and context are fresh', () => {
// Simulates leak of lastDetailHeavy from a prior conversation.
const h = computeHealthScore(input({
model: SONNET_45,
turnCount: 1,
contextPct: 25,
isDetailHeavy: true,
}));
expect(h.level).toBe('healthy');
});

it('handles negative contextPct from buggy upstream as fresh', () => {
// Defensive: malformed pricing.json or zero-window fallback could
// yield negative percentages. Treat as fresh, do not crash.
const h = computeHealthScore(input({
turnCount: 1,
contextPct: -5,
}));
expect(h.level).toBe('healthy');
});

it('does NOT apply at turnCount=3 (just past ceiling)', () => {
// Past the turn ceiling -> fast growth secondary rule fires as
// before the guard existed.
const h = computeHealthScore(input({
turnCount: FRESH_SESSION_TURN_CEIL + 1,
contextPct: 35,
growthRate: FAST_GROWTH_PCT + 1,
}));
expect(h.level).toBe('degrading');
});

it('does NOT apply at contextPct=30 exactly (ceiling is exclusive)', () => {
// Verifies the guard's contextPct ceiling is exclusive: at
// contextPct=FRESH_SESSION_CONTEXT_CEIL the guard does NOT fire.
// On Sonnet 4.5 (warn=50), 30% is still healthy by the primary
// classifier, so the level assertion is the boundary check.
//
// The coaching match is a shape check, not proof of branch:
// both the guard and the primary fall-through call
// getRotCoaching(model, contextPct, isDetailHeavy) with the
// same arguments, so the string is identical from either path.
// We assert it cites the model only to confirm a non-empty,
// model-aware coaching string was produced.
const h = computeHealthScore(input({
model: SONNET_45,
turnCount: 1,
contextPct: FRESH_SESSION_CONTEXT_CEIL,
}));
expect(h.level).toBe('healthy');
expect(h.coaching).toMatch(/Sonnet 4\.5/);
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

it('does NOT apply when contextPct hits the absolute critical floor with low turns', () => {
// 95% on turn 1 (huge first prompt + system + RAG): the guard
// requires BOTH conditions, contextPct=95 fails the < 30 check,
// so the absolute critical floor (Rule 1) wins.
const h = computeHealthScore(input({
turnCount: 1,
contextPct: 95,
}));
expect(h.level).toBe('critical');
});

it('does NOT mask high context with low turns (untracked old chat)', () => {
// User opens a pre-existing claude.ai conversation that LCO never
// tracked. After the first observed turn, contextPct may already
// be 70%. Guard must not fire. On Sonnet 4.5 (warn=50, crit=75),
// 70% is degrading.
const h = computeHealthScore(input({
model: SONNET_45,
turnCount: 1,
contextPct: 70,
}));
expect(h.level).toBe('degrading');
});

it('does NOT mask warn-boundary first turn (heavy first prompt)', () => {
// Brand-new chat with a heavy initial prompt + system + RAG: the
// first turn lands at the per-model warn threshold. The user
// deserves the warning even on turn 1.
const h = computeHealthScore(input({
model: SONNET_45,
turnCount: 1,
contextPct: 50,
}));
expect(h.level).toBe('degrading');
});

it('does NOT mask high context with detail-heavy on first turn', () => {
// Detail-heavy shifts Sonnet 4.5 warn from 50 to 35. A turn-1
// chat at 40% with a precision keyword should still warn.
const h = computeHealthScore(input({
model: SONNET_45,
turnCount: 1,
contextPct: 40,
isDetailHeavy: true,
}));
expect(h.level).toBe('degrading');
});

it('produces model-aware coaching string when guard fires with a known model', () => {
// The guard reuses getRotCoaching, which on a healthy zone with
// contextPct < LOW_CONTEXT_REASSURANCE_CEIL returns the "fresh
// and responsive" copy. Verify the string actually came back
// shaped, not empty.
const h = computeHealthScore(input({
model: SONNET_45,
turnCount: 0,
contextPct: 5,
}));
expect(h.level).toBe('healthy');
expect(h.coaching.length).toBeGreaterThan(0);
expect(h.coaching).toMatch(/fresh/i);
});
});

// ── Detail-heavy adjustment ──────────────────────────────────────────────────

describe('detail-heavy adjustment', () => {
Expand Down
Loading